1 //===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the BlockGenerator and VectorBlockGenerator classes, which
10 // generate sequential code and vectorized code for a polyhedral statement,
11 // respectively.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef POLLY_BLOCK_GENERATORS_H
16 #define POLLY_BLOCK_GENERATORS_H
17 
18 #include "polly/CodeGen/IRBuilder.h"
19 #include "polly/Support/ScopHelper.h"
20 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
21 #include "isl/isl-noexceptions.h"
22 
23 namespace polly {
24 using namespace llvm;
25 class MemoryAccess;
26 class ScopArrayInfo;
27 class IslExprBuilder;
28 
29 /// Generate a new basic block for a polyhedral statement.
30 class BlockGenerator {
31 public:
32   typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT;
33 
34   /// Map types to resolve scalar dependences.
35   ///
36   ///@{
37   using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>;
38 
39   /// Simple vector of instructions to store escape users.
40   using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
41 
42   /// Map type to resolve escaping users for scalar instructions.
43   ///
44   /// @see The EscapeMap member.
45   using EscapeUsersAllocaMapTy =
46       MapVector<Instruction *,
47                 std::pair<AssertingVH<Value>, EscapeUserVectorTy>>;
48 
49   ///@}
50 
51   /// Create a generator for basic blocks.
52   ///
53   /// @param Builder     The LLVM-IR Builder used to generate the statement. The
54   ///                    code is generated at the location, the Builder points
55   ///                    to.
56   /// @param LI          The loop info for the current function
57   /// @param SE          The scalar evolution info for the current function
58   /// @param DT          The dominator tree of this function.
59   /// @param ScalarMap   Map from scalars to their demoted location.
60   /// @param EscapeMap   Map from scalars to their escape users and locations.
61   /// @param GlobalMap   A mapping from llvm::Values used in the original scop
62   ///                    region to a new set of llvm::Values. Each reference to
63   ///                    an original value appearing in this mapping is replaced
64   ///                    with the new value it is mapped to.
65   /// @param ExprBuilder An expression builder to generate new access functions.
66   /// @param StartBlock  The first basic block after the RTC.
67   BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
68                  DominatorTree &DT, AllocaMapTy &ScalarMap,
69                  EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap,
70                  IslExprBuilder *ExprBuilder, BasicBlock *StartBlock);
71 
72   /// Copy the basic block.
73   ///
74   /// This copies the entire basic block and updates references to old values
75   /// with references to new values, as defined by GlobalMap.
76   ///
77   /// @param Stmt        The block statement to code generate.
78   /// @param LTS         A map from old loops to new induction variables as
79   ///                    SCEVs.
80   /// @param NewAccesses A map from memory access ids to new ast expressions,
81   ///                    which may contain new access expressions for certain
82   ///                    memory accesses.
83   void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
84                 isl_id_to_ast_expr *NewAccesses);
85 
86   /// Remove a ScopArrayInfo's allocation from the ScalarMap.
87   ///
88   /// This function allows to remove values from the ScalarMap. This is useful
89   /// if the corresponding alloca instruction will be deleted (or moved into
90   /// another module), as without removing these values the underlying
91   /// AssertingVH will trigger due to us still keeping reference to this
92   /// scalar.
93   ///
94   /// @param Array The array for which the alloca was generated.
freeScalarAlloc(ScopArrayInfo * Array)95   void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); }
96 
97   /// Return the alloca for @p Access.
98   ///
99   /// If no alloca was mapped for @p Access a new one is created.
100   ///
101   /// @param Access    The memory access for which to generate the alloca.
102   ///
103   /// @returns The alloca for @p Access or a replacement value taken from
104   ///          GlobalMap.
105   Value *getOrCreateAlloca(const MemoryAccess &Access);
106 
107   /// Return the alloca for @p Array.
108   ///
109   /// If no alloca was mapped for @p Array a new one is created.
110   ///
111   /// @param Array The array for which to generate the alloca.
112   ///
113   /// @returns The alloca for @p Array or a replacement value taken from
114   ///          GlobalMap.
115   Value *getOrCreateAlloca(const ScopArrayInfo *Array);
116 
117   /// Finalize the code generation for the SCoP @p S.
118   ///
119   /// This will initialize and finalize the scalar variables we demoted during
120   /// the code generation.
121   ///
122   /// @see createScalarInitialization(Scop &)
123   /// @see createScalarFinalization(Region &)
124   void finalizeSCoP(Scop &S);
125 
126   /// An empty destructor
~BlockGenerator()127   virtual ~BlockGenerator() {}
128 
129   BlockGenerator(const BlockGenerator &) = default;
130 
131 protected:
132   PollyIRBuilder &Builder;
133   LoopInfo &LI;
134   ScalarEvolution &SE;
135   IslExprBuilder *ExprBuilder;
136 
137   /// The dominator tree of this function.
138   DominatorTree &DT;
139 
140   /// The entry block of the current function.
141   BasicBlock *EntryBB;
142 
143   /// Map to resolve scalar dependences for PHI operands and scalars.
144   ///
145   /// When translating code that contains scalar dependences as they result from
146   /// inter-block scalar dependences (including the use of data carrying PHI
147   /// nodes), we do not directly regenerate in-register SSA code, but instead
148   /// allocate some stack memory through which these scalar values are passed.
149   /// Only a later pass of -mem2reg will then (re)introduce in-register
150   /// computations.
151   ///
152   /// To keep track of the memory location(s) used to store the data computed by
153   /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a
154   /// given ScopArrayInfo to the junk of stack allocated memory, that is
155   /// used for code generation.
156   ///
157   /// Up to two different ScopArrayInfo objects are associated with each
158   /// llvm::Value:
159   ///
160   /// MemoryType::Value objects are used for normal scalar dependences that go
161   /// from a scalar definition to its use. Such dependences are lowered by
162   /// directly writing the value an instruction computes into the corresponding
163   /// chunk of memory and reading it back from this chunk of memory right before
164   /// every use of this original scalar value. The memory allocations for
165   /// MemoryType::Value objects end with '.s2a'.
166   ///
167   /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI
168   /// nodes. For each PHI nodes we introduce, besides the Array of type
169   /// MemoryType::Value, a second chunk of memory into which we write at the end
170   /// of each basic block preceding the PHI instruction the value passed
171   /// through this basic block. At the place where the PHI node is executed, we
172   /// replace the PHI node with a load from the corresponding MemoryType::PHI
173   /// memory location. The memory allocations for MemoryType::PHI end with
174   /// '.phiops'.
175   ///
176   /// Example:
177   ///
178   ///                              Input C Code
179   ///                              ============
180   ///
181   ///                 S1:      x1 = ...
182   ///                          for (i=0...N) {
183   ///                 S2:           x2 = phi(x1, add)
184   ///                 S3:           add = x2 + 42;
185   ///                          }
186   ///                 S4:      print(x1)
187   ///                          print(x2)
188   ///                          print(add)
189   ///
190   ///
191   ///        Unmodified IR                         IR After expansion
192   ///        =============                         ==================
193   ///
194   /// S1:   x1 = ...                     S1:    x1 = ...
195   ///                                           x1.s2a = s1
196   ///                                           x2.phiops = s1
197   ///        |                                    |
198   ///        |   <--<--<--<--<                    |   <--<--<--<--<
199   ///        | /              \                   | /              \     .
200   ///        V V               \                  V V               \    .
201   /// S2:  x2 = phi (x1, add)   |        S2:    x2 = x2.phiops       |
202   ///                           |               x2.s2a = x2          |
203   ///                           |                                    |
204   /// S3:  add = x2 + 42        |        S3:    add = x2 + 42        |
205   ///                           |               add.s2a = add        |
206   ///                           |               x2.phiops = add      |
207   ///        | \               /                  | \               /
208   ///        |  \             /                   |  \             /
209   ///        |   >-->-->-->-->                    |   >-->-->-->-->
210   ///        V                                    V
211   ///
212   ///                                    S4:    x1 = x1.s2a
213   /// S4:  ... = x1                             ... = x1
214   ///                                           x2 = x2.s2a
215   ///      ... = x2                             ... = x2
216   ///                                           add = add.s2a
217   ///      ... = add                            ... = add
218   ///
219   ///      ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a,
220   ///                    add:Value -> add.s2a, x2:PHI -> x2.phiops }
221   ///
222   ///  ??? Why does a PHI-node require two memory chunks ???
223   ///
224   ///  One may wonder why a PHI node requires two memory chunks and not just
225   ///  all data is stored in a single location. The following example tries
226   ///  to store all data in .s2a and drops the .phiops location:
227   ///
228   ///      S1:    x1 = ...
229   ///             x1.s2a = s1
230   ///             x2.s2a = s1             // use .s2a instead of .phiops
231   ///               |
232   ///               |   <--<--<--<--<
233   ///               | /              \    .
234   ///               V V               \   .
235   ///      S2:    x2 = x2.s2a          |  // value is same as above, but read
236   ///                                  |  // from .s2a
237   ///                                  |
238   ///             x2.s2a = x2          |  // store into .s2a as normal
239   ///                                  |
240   ///      S3:    add = x2 + 42        |
241   ///             add.s2a = add        |
242   ///             x2.s2a = add         |  // use s2a instead of .phiops
243   ///               | \               /   // !!! This is wrong, as x2.s2a now
244   ///               |   >-->-->-->-->     // contains add instead of x2.
245   ///               V
246   ///
247   ///      S4:    x1 = x1.s2a
248   ///             ... = x1
249   ///             x2 = x2.s2a             // !!! We now read 'add' instead of
250   ///             ... = x2                // 'x2'
251   ///             add = add.s2a
252   ///             ... = add
253   ///
254   ///  As visible in the example, the SSA value of the PHI node may still be
255   ///  needed _after_ the basic block, which could conceptually branch to the
256   ///  PHI node, has been run and has overwritten the PHI's old value. Hence, a
257   ///  single memory location is not enough to code-generate a PHI node.
258   ///
259   /// Memory locations used for the special PHI node modeling.
260   AllocaMapTy &ScalarMap;
261 
262   /// Map from instructions to their escape users as well as the alloca.
263   EscapeUsersAllocaMapTy &EscapeMap;
264 
265   /// A map from llvm::Values referenced in the old code to a new set of
266   ///        llvm::Values, which is used to replace these old values during
267   ///        code generation.
268   ValueMapT &GlobalMap;
269 
270   /// The first basic block after the RTC.
271   BasicBlock *StartBlock;
272 
273   /// Split @p BB to create a new one we can use to clone @p BB in.
274   BasicBlock *splitBB(BasicBlock *BB);
275 
276   /// Copy the given basic block.
277   ///
278   /// @param Stmt      The statement to code generate.
279   /// @param BB        The basic block to code generate.
280   /// @param BBMap     A mapping from old values to their new values in this
281   /// block.
282   /// @param LTS         A map from old loops to new induction variables as
283   ///                    SCEVs.
284   /// @param NewAccesses A map from memory access ids to new ast expressions,
285   ///                    which may contain new access expressions for certain
286   ///                    memory accesses.
287   ///
288   /// @returns The copy of the basic block.
289   BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap,
290                      LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
291 
292   /// Copy the given basic block.
293   ///
294   /// @param Stmt      The statement to code generate.
295   /// @param BB        The basic block to code generate.
296   /// @param BBCopy    The new basic block to generate code in.
297   /// @param BBMap     A mapping from old values to their new values in this
298   /// block.
299   /// @param LTS         A map from old loops to new induction variables as
300   ///                    SCEVs.
301   /// @param NewAccesses A map from memory access ids to new ast expressions,
302   ///                    which may contain new access expressions for certain
303   ///                    memory accesses.
304   void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
305               ValueMapT &BBMap, LoopToScevMapT &LTS,
306               isl_id_to_ast_expr *NewAccesses);
307 
308   /// Generate reload of scalars demoted to memory and needed by @p Stmt.
309   ///
310   /// @param Stmt  The statement we generate code for.
311   /// @param LTS   A mapping from loops virtual canonical induction
312   ///              variable to their new values.
313   /// @param BBMap A mapping from old values to their new values in this block.
314   /// @param NewAccesses A map from memory access ids to new ast expressions.
315   void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT &LTS,
316                            ValueMapT &BBMap,
317                            __isl_keep isl_id_to_ast_expr *NewAccesses);
318 
319   /// When statement tracing is enabled, build the print instructions for
320   /// printing the current statement instance.
321   ///
322   /// The printed output looks like:
323   ///
324   ///     Stmt1(0)
325   ///
326   /// If printing of scalars is enabled, it also appends the value of each
327   /// scalar to the line:
328   ///
329   ///     Stmt1(0) %i=1 %sum=5
330   ///
331   /// @param Stmt  The statement we generate code for.
332   /// @param LTS   A mapping from loops virtual canonical induction
333   ///              variable to their new values.
334   /// @param BBMap A mapping from old values to their new values in this block.
335   void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
336                               ValueMapT &BBMap);
337 
338   /// Generate instructions that compute whether one instance of @p Set is
339   /// executed.
340   ///
341   /// @param Stmt      The statement we generate code for.
342   /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in
343   ///                  @p Stmt's domain are ignored.
344   ///
345   /// @return An expression of type i1, generated into the current builder
346   ///         position, that evaluates to 1 if the executed instance is part of
347   ///         @p Set.
348   Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain);
349 
350   /// Generate code that executes in a subset of @p Stmt's domain.
351   ///
352   /// @param Stmt        The statement we generate code for.
353   /// @param Subdomain   The condition for some code to be executed.
354   /// @param Subject     A name for the code that is executed
355   ///                    conditionally. Used to name new basic blocks and
356   ///                    instructions.
357   /// @param GenThenFunc Callback which generates the code to be executed
358   ///                    when the current executed instance is in @p Set. The
359   ///                    IRBuilder's position is moved to within the block that
360   ///                    executes conditionally for this callback.
361   void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain,
362                                     StringRef Subject,
363                                     const std::function<void()> &GenThenFunc);
364 
365   /// Generate the scalar stores for the given statement.
366   ///
367   /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
368   /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
369   /// be demoted to memory.
370   ///
371   /// @param Stmt  The statement we generate code for.
372   /// @param LTS   A mapping from loops virtual canonical induction
373   ///              variable to their new values
374   ///              (for values recalculated in the new ScoP, but not
375   ///               within this basic block)
376   /// @param BBMap A mapping from old values to their new values in this block.
377   /// @param NewAccesses A map from memory access ids to new ast expressions.
378   virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS,
379                                     ValueMapT &BBMap,
380                                     __isl_keep isl_id_to_ast_expr *NewAccesses);
381 
382   /// Handle users of @p Array outside the SCoP.
383   ///
384   /// @param S         The current SCoP.
385   /// @param Inst      The ScopArrayInfo to handle.
386   void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array);
387 
388   /// Find scalar statements that have outside users.
389   ///
390   /// We register these scalar values to later update subsequent scalar uses of
391   /// these values to either use the newly computed value from within the scop
392   /// (if the scop was executed) or the unchanged original code (if the run-time
393   /// check failed).
394   ///
395   /// @param S The scop for which to find the outside users.
396   void findOutsideUsers(Scop &S);
397 
398   /// Initialize the memory of demoted scalars.
399   ///
400   /// @param S The scop for which to generate the scalar initializers.
401   void createScalarInitialization(Scop &S);
402 
403   /// Create exit PHI node merges for PHI nodes with more than two edges
404   ///        from inside the scop.
405   ///
406   /// For scops which have a PHI node in the exit block that has more than two
407   /// incoming edges from inside the scop region, we require some special
408   /// handling to understand which of the possible values will be passed to the
409   /// PHI node from inside the optimized version of the scop. To do so ScopInfo
410   /// models the possible incoming values as write accesses of the ScopStmts.
411   ///
412   /// This function creates corresponding code to reload the computed outgoing
413   /// value from the stack slot it has been stored into and to pass it on to the
414   /// PHI node in the original exit block.
415   ///
416   /// @param S The scop for which to generate the exiting PHI nodes.
417   void createExitPHINodeMerges(Scop &S);
418 
419   /// Promote the values of demoted scalars after the SCoP.
420   ///
421   /// If a scalar value was used outside the SCoP we need to promote the value
422   /// stored in the memory cell allocated for that scalar and combine it with
423   /// the original value in the non-optimized SCoP.
424   void createScalarFinalization(Scop &S);
425 
426   /// Try to synthesize a new value
427   ///
428   /// Given an old value, we try to synthesize it in a new context from its
429   /// original SCEV expression. We start from the original SCEV expression,
430   /// then replace outdated parameter and loop references, and finally
431   /// expand it to code that computes this updated expression.
432   ///
433   /// @param Stmt      The statement to code generate
434   /// @param Old       The old Value
435   /// @param BBMap     A mapping from old values to their new values
436   ///                  (for values recalculated within this basic block)
437   /// @param LTS       A mapping from loops virtual canonical induction
438   ///                  variable to their new values
439   ///                  (for values recalculated in the new ScoP, but not
440   ///                   within this basic block)
441   /// @param L         The loop that surrounded the instruction that referenced
442   ///                  this value in the original code. This loop is used to
443   ///                  evaluate the scalar evolution at the right scope.
444   ///
445   /// @returns  o A newly synthesized value.
446   ///           o NULL, if synthesizing the value failed.
447   Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
448                                LoopToScevMapT &LTS, Loop *L) const;
449 
450   /// Get the new version of a value.
451   ///
452   /// Given an old value, we first check if a new version of this value is
453   /// available in the BBMap or GlobalMap. In case it is not and the value can
454   /// be recomputed using SCEV, we do so. If we can not recompute a value
455   /// using SCEV, but we understand that the value is constant within the scop,
456   /// we return the old value.  If the value can still not be derived, this
457   /// function will assert.
458   ///
459   /// @param Stmt      The statement to code generate.
460   /// @param Old       The old Value.
461   /// @param BBMap     A mapping from old values to their new values
462   ///                  (for values recalculated within this basic block).
463   /// @param LTS       A mapping from loops virtual canonical induction
464   ///                  variable to their new values
465   ///                  (for values recalculated in the new ScoP, but not
466   ///                   within this basic block).
467   /// @param L         The loop that surrounded the instruction that referenced
468   ///                  this value in the original code. This loop is used to
469   ///                  evaluate the scalar evolution at the right scope.
470   ///
471   /// @returns  o The old value, if it is still valid.
472   ///           o The new value, if available.
473   ///           o NULL, if no value is found.
474   Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
475                      LoopToScevMapT &LTS, Loop *L) const;
476 
477   void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
478                       LoopToScevMapT &LTS);
479 
480   /// Get the innermost loop that surrounds the statement @p Stmt.
481   Loop *getLoopForStmt(const ScopStmt &Stmt) const;
482 
483   /// Generate the operand address
484   /// @param NewAccesses A map from memory access ids to new ast expressions,
485   ///                    which may contain new access expressions for certain
486   ///                    memory accesses.
487   Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
488                                   ValueMapT &BBMap, LoopToScevMapT &LTS,
489                                   isl_id_to_ast_expr *NewAccesses);
490 
491   /// Generate the operand address.
492   ///
493   /// @param Stmt         The statement to generate code for.
494   /// @param L            The innermost loop that surrounds the statement.
495   /// @param Pointer      If the access expression is not changed (ie. not found
496   ///                     in @p LTS), use this Pointer from the original code
497   ///                     instead.
498   /// @param BBMap        A mapping from old values to their new values.
499   /// @param LTS          A mapping from loops virtual canonical induction
500   ///                     variable to their new values.
501   /// @param NewAccesses  Ahead-of-time generated access expressions.
502   /// @param Id           Identifier of the MemoryAccess to generate.
503   /// @param ExpectedType The type the returned value should have.
504   ///
505   /// @return The generated address.
506   Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer,
507                                   ValueMapT &BBMap, LoopToScevMapT &LTS,
508                                   isl_id_to_ast_expr *NewAccesses,
509                                   __isl_take isl_id *Id, Type *ExpectedType);
510 
511   /// Generate the pointer value that is accesses by @p Access.
512   ///
513   /// For write accesses, generate the target address. For read accesses,
514   /// generate the source address.
515   /// The access can be either an array access or a scalar access. In the first
516   /// case, the returned address will point to an element into that array. In
517   /// the scalar case, an alloca is used.
518   /// If a new AccessRelation is set for the MemoryAccess, the new relation will
519   /// be used.
520   ///
521   /// @param Access      The access to generate a pointer for.
522   /// @param L           The innermost loop that surrounds the statement.
523   /// @param LTS         A mapping from loops virtual canonical induction
524   ///                    variable to their new values.
525   /// @param BBMap       A mapping from old values to their new values.
526   /// @param NewAccesses A map from memory access ids to new ast expressions.
527   ///
528   /// @return The generated address.
529   Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT &LTS,
530                             ValueMapT &BBMap,
531                             __isl_keep isl_id_to_ast_expr *NewAccesses);
532 
533   /// @param NewAccesses A map from memory access ids to new ast expressions,
534   ///                    which may contain new access expressions for certain
535   ///                    memory accesses.
536   Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap,
537                            LoopToScevMapT &LTS,
538                            isl_id_to_ast_expr *NewAccesses);
539 
540   /// @param NewAccesses A map from memory access ids to new ast expressions,
541   ///                    which may contain new access expressions for certain
542   ///                    memory accesses.
543   void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap,
544                           LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
545 
546   /// Copy a single PHI instruction.
547   ///
548   /// The implementation in the BlockGenerator is trivial, however it allows
549   /// subclasses to handle PHIs different.
copyPHIInstruction(ScopStmt &,PHINode *,ValueMapT &,LoopToScevMapT &)550   virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &,
551                                   LoopToScevMapT &) {}
552 
553   /// Copy a single Instruction.
554   ///
555   /// This copies a single Instruction and updates references to old values
556   /// with references to new values, as defined by GlobalMap and BBMap.
557   ///
558   /// @param Stmt        The statement to code generate.
559   /// @param Inst        The instruction to copy.
560   /// @param BBMap       A mapping from old values to their new values
561   ///                    (for values recalculated within this basic block).
562   /// @param GlobalMap   A mapping from old values to their new values
563   ///                    (for values recalculated in the new ScoP, but not
564   ///                    within this basic block).
565   /// @param LTS         A mapping from loops virtual canonical induction
566   ///                    variable to their new values
567   ///                    (for values recalculated in the new ScoP, but not
568   ///                     within this basic block).
569   /// @param NewAccesses A map from memory access ids to new ast expressions,
570   ///                    which may contain new access expressions for certain
571   ///                    memory accesses.
572   void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
573                        LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
574 
575   /// Helper to determine if @p Inst can be synthesized in @p Stmt.
576   ///
577   /// @returns false, iff @p Inst can be synthesized in @p Stmt.
578   bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst);
579 
580   /// Remove dead instructions generated for BB
581   ///
582   /// @param BB The basic block code for which code has been generated.
583   /// @param BBMap A local map from old to new instructions.
584   void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap);
585 
586   /// Invalidate the scalar evolution expressions for a scop.
587   ///
588   /// This function invalidates the scalar evolution results for all
589   /// instructions that are part of a given scop, and the loops
590   /// surrounding the users of merge blocks. This is necessary to ensure that
591   /// later scops do not obtain scalar evolution expressions that reference
592   /// values that earlier dominated the later scop, but have been moved in the
593   /// conditional part of an earlier scop and consequently do not any more
594   /// dominate the later scop.
595   ///
596   /// @param S The scop to invalidate.
597   void invalidateScalarEvolution(Scop &S);
598 };
599 
600 /// Generate a new vector basic block for a polyhedral statement.
601 ///
602 /// The only public function exposed is generate().
603 class VectorBlockGenerator : BlockGenerator {
604 public:
605   /// Generate a new vector basic block for a ScoPStmt.
606   ///
607   /// This code generation is similar to the normal, scalar code generation,
608   /// except that each instruction is code generated for several vector lanes
609   /// at a time. If possible instructions are issued as actual vector
610   /// instructions, but e.g. for address calculation instructions we currently
611   /// generate scalar instructions for each vector lane.
612   ///
613   /// @param BlockGen    A block generator object used as parent.
614   /// @param Stmt        The statement to code generate.
615   /// @param VLTS        A mapping from loops virtual canonical induction
616   ///                    variable to their new values
617   ///                    (for values recalculated in the new ScoP, but not
618   ///                     within this basic block), one for each lane.
619   /// @param Schedule    A map from the statement to a schedule where the
620   ///                    innermost dimension is the dimension of the innermost
621   ///                    loop containing the statement.
622   /// @param NewAccesses A map from memory access ids to new ast expressions,
623   ///                    which may contain new access expressions for certain
624   ///                    memory accesses.
generate(BlockGenerator & BlockGen,ScopStmt & Stmt,std::vector<LoopToScevMapT> & VLTS,__isl_keep isl_map * Schedule,__isl_keep isl_id_to_ast_expr * NewAccesses)625   static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
626                        std::vector<LoopToScevMapT> &VLTS,
627                        __isl_keep isl_map *Schedule,
628                        __isl_keep isl_id_to_ast_expr *NewAccesses) {
629     VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
630     Generator.copyStmt(Stmt, NewAccesses);
631   }
632 
633 private:
634   // This is a vector of loop->scev maps.  The first map is used for the first
635   // vector lane, ...
636   // Each map, contains information about Instructions in the old ScoP, which
637   // are recalculated in the new SCoP. When copying the basic block, we replace
638   // all references to the old instructions with their recalculated values.
639   //
640   // For example, when the code generator produces this AST:
641   //
642   //   for (int c1 = 0; c1 <= 1023; c1 += 1)
643   //     for (int c2 = 0; c2 <= 1023; c2 += VF)
644   //       for (int lane = 0; lane <= VF; lane += 1)
645   //         Stmt(c2 + lane + 3, c1);
646   //
647   // VLTS[lane] contains a map:
648   //   "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
649   //   "inner loop in the old loop nest" -> SCEV("c1").
650   std::vector<LoopToScevMapT> &VLTS;
651 
652   // A map from the statement to a schedule where the innermost dimension is the
653   // dimension of the innermost loop containing the statement.
654   isl_map *Schedule;
655 
656   VectorBlockGenerator(BlockGenerator &BlockGen,
657                        std::vector<LoopToScevMapT> &VLTS,
658                        __isl_keep isl_map *Schedule);
659 
660   int getVectorWidth();
661 
662   Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
663                         VectorValueMapT &ScalarMaps, Loop *L);
664 
665   Type *getVectorPtrTy(const Value *V, int Width);
666 
667   /// Load a vector from a set of adjacent scalars
668   ///
669   /// In case a set of scalars is known to be next to each other in memory,
670   /// create a vector load that loads those scalars
671   ///
672   /// %vector_ptr= bitcast double* %p to <4 x double>*
673   /// %vec_full = load <4 x double>* %vector_ptr
674   ///
675   /// @param Stmt           The statement to code generate.
676   /// @param NegativeStride This is used to indicate a -1 stride. In such
677   ///                       a case we load the end of a base address and
678   ///                       shuffle the accesses in reverse order into the
679   ///                       vector. By default we would do only positive
680   ///                       strides.
681   ///
682   /// @param NewAccesses    A map from memory access ids to new ast
683   ///                       expressions, which may contain new access
684   ///                       expressions for certain memory accesses.
685   Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
686                                VectorValueMapT &ScalarMaps,
687                                __isl_keep isl_id_to_ast_expr *NewAccesses,
688                                bool NegativeStride);
689 
690   /// Load a vector initialized from a single scalar in memory
691   ///
692   /// In case all elements of a vector are initialized to the same
693   /// scalar value, this value is loaded and shuffled into all elements
694   /// of the vector.
695   ///
696   /// %splat_one = load <1 x double>* %p
697   /// %splat = shufflevector <1 x double> %splat_one, <1 x
698   ///       double> %splat_one, <4 x i32> zeroinitializer
699   ///
700   /// @param NewAccesses A map from memory access ids to new ast expressions,
701   ///                    which may contain new access expressions for certain
702   ///                    memory accesses.
703   Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
704                                 ValueMapT &BBMap,
705                                 __isl_keep isl_id_to_ast_expr *NewAccesses);
706 
707   /// Load a vector from scalars distributed in memory
708   ///
709   /// In case some scalars a distributed randomly in memory. Create a vector
710   /// by loading each scalar and by inserting one after the other into the
711   /// vector.
712   ///
713   /// %scalar_1= load double* %p_1
714   /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
715   /// %scalar 2 = load double* %p_2
716   /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
717   ///
718   /// @param NewAccesses A map from memory access ids to new ast expressions,
719   ///                    which may contain new access expressions for certain
720   ///                    memory accesses.
721   Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
722                                    VectorValueMapT &ScalarMaps,
723                                    __isl_keep isl_id_to_ast_expr *NewAccesses);
724 
725   /// @param NewAccesses A map from memory access ids to new ast expressions,
726   ///                    which may contain new access expressions for certain
727   ///                    memory accesses.
728   void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
729                     VectorValueMapT &ScalarMaps,
730                     __isl_keep isl_id_to_ast_expr *NewAccesses);
731 
732   void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
733                      ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
734 
735   void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
736                       ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
737 
738   /// @param NewAccesses A map from memory access ids to new ast expressions,
739   ///                    which may contain new access expressions for certain
740   ///                    memory accesses.
741   void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
742                  VectorValueMapT &ScalarMaps,
743                  __isl_keep isl_id_to_ast_expr *NewAccesses);
744 
745   /// @param NewAccesses A map from memory access ids to new ast expressions,
746   ///                    which may contain new access expressions for certain
747   ///                    memory accesses.
748   void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
749                           ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
750                           __isl_keep isl_id_to_ast_expr *NewAccesses);
751 
752   bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
753                            VectorValueMapT &ScalarMaps);
754 
755   bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
756 
757   /// Generate vector loads for scalars.
758   ///
759   /// @param Stmt           The scop statement for which to generate the loads.
760   /// @param VectorBlockMap A map that will be updated to relate the original
761   ///                       values with the newly generated vector loads.
762   void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
763 
764   /// Verify absence of scalar stores.
765   ///
766   /// @param Stmt The scop statement to check for scalar stores.
767   void verifyNoScalarStores(ScopStmt &Stmt);
768 
769   /// @param NewAccesses A map from memory access ids to new ast expressions,
770   ///                    which may contain new access expressions for certain
771   ///                    memory accesses.
772   void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
773                        VectorValueMapT &ScalarMaps,
774                        __isl_keep isl_id_to_ast_expr *NewAccesses);
775 
776   /// @param NewAccesses A map from memory access ids to new ast expressions,
777   ///                    which may contain new access expressions for certain
778   ///                    memory accesses.
779   void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
780 };
781 
782 /// Generator for new versions of polyhedral region statements.
783 class RegionGenerator : public BlockGenerator {
784 public:
785   /// Create a generator for regions.
786   ///
787   /// @param BlockGen A generator for basic blocks.
RegionGenerator(BlockGenerator & BlockGen)788   RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {}
789 
~RegionGenerator()790   virtual ~RegionGenerator() {}
791 
792   /// Copy the region statement @p Stmt.
793   ///
794   /// This copies the entire region represented by @p Stmt and updates
795   /// references to old values with references to new values, as defined by
796   /// GlobalMap.
797   ///
798   /// @param Stmt      The statement to code generate.
799   /// @param LTS       A map from old loops to new induction variables as SCEVs.
800   void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
801                 __isl_keep isl_id_to_ast_expr *IdToAstExp);
802 
803 private:
804   /// A map from old to the first new block in the region, that was created to
805   /// model the old basic block.
806   DenseMap<BasicBlock *, BasicBlock *> StartBlockMap;
807 
808   /// A map from old to the last new block in the region, that was created to
809   /// model the old basic block.
810   DenseMap<BasicBlock *, BasicBlock *> EndBlockMap;
811 
812   /// The "BBMaps" for the whole region (one for each block). In case a basic
813   /// block is code generated to multiple basic blocks (e.g., for partial
814   /// writes), the StartBasic is used as index for the RegionMap.
815   DenseMap<BasicBlock *, ValueMapT> RegionMaps;
816 
817   /// Mapping to remember PHI nodes that still need incoming values.
818   using PHINodePairTy = std::pair<PHINode *, PHINode *>;
819   DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
820 
821   /// Repair the dominance tree after we created a copy block for @p BB.
822   ///
823   /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
824   BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
825 
826   /// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
827   ///
828   /// PHI nodes, which may have (multiple) edges that enter from outside the
829   /// non-affine subregion and even from outside the scop, are code generated as
830   /// follows:
831   ///
832   /// # Original
833   ///
834   ///   Region: %A-> %exit
835   ///   NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
836   ///
837   ///     pre:
838   ///       %val = add i64 1, 1
839   ///
840   ///     A:
841   ///      br label %nonaff
842   ///
843   ///     nonaffB:
844   ///       %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
845   ///       %cmp = <nonaff>
846   ///       br i1 %cmp, label %C, label %nonaffC
847   ///
848   ///     nonaffC:
849   ///       %valC = add i64 1, 1
850   ///       br i1 undef, label %D, label %nonaffB
851   ///
852   ///     D:
853   ///       %valD = ...
854   ///       %exit_cond = <loopexit>
855   ///       br i1 %exit_cond, label %nonaffB, label %exit
856   ///
857   ///     exit:
858   ///       ...
859   ///
860   ///  - %start and %C enter from outside the non-affine region.
861   ///  - %nonaffC enters from within the non-affine region.
862   ///
863   ///  # New
864   ///
865   ///    polly.A:
866   ///       store i64 %val, i64* %phi.phiops
867   ///       br label %polly.nonaffA.entry
868   ///
869   ///    polly.nonaffB.entry:
870   ///       %phi.phiops.reload = load i64, i64* %phi.phiops
871   ///       br label %nonaffB
872   ///
873   ///    polly.nonaffB:
874   ///       %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
875   ///                    [%p.valC, %polly.nonaffC]
876   ///
877   ///    polly.nonaffC:
878   ///       %p.valC = add i64 1, 1
879   ///       br i1 undef, label %polly.D, label %polly.nonaffB
880   ///
881   ///    polly.D:
882   ///        %p.valD = ...
883   ///        store i64 %p.valD, i64* %phi.phiops
884   ///        %p.exit_cond = <loopexit>
885   ///        br i1 %p.exit_cond, label %polly.nonaffB, label %exit
886   ///
887   /// Values that enter the PHI from outside the non-affine region are stored
888   /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
889   /// reloaded in %polly.nonaffB.entry, a basic block generated before the
890   /// actual non-affine region.
891   ///
892   /// When generating the PHI node of the non-affine region in %polly.nonaffB,
893   /// incoming edges from outside the region are combined into a single branch
894   /// from %polly.nonaffB.entry which has as incoming value the value reloaded
895   /// from the %phi.phiops stack slot. Incoming edges from within the region
896   /// refer to the copied instructions (%p.valC) and basic blocks
897   /// (%polly.nonaffC) of the non-affine region.
898   ///
899   /// @param Stmt       The statement to code generate.
900   /// @param PHI        The original PHI we copy.
901   /// @param PHICopy    The copy of @p PHI.
902   /// @param IncomingBB An incoming block of @p PHI.
903   /// @param LTS        A map from old loops to new induction variables as
904   /// SCEVs.
905   void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy,
906                        BasicBlock *IncomingBB, LoopToScevMapT &LTS);
907 
908   /// Create a PHI that combines the incoming values from all incoming blocks
909   /// that are in the subregion.
910   ///
911   /// PHIs in the subregion's exit block can have incoming edges from within and
912   /// outside the subregion. This function combines the incoming values from
913   /// within the subregion to appear as if there is only one incoming edge from
914   /// the subregion (an additional exit block is created by RegionGenerator).
915   /// This is to avoid that a value is written to the .phiops location without
916   /// leaving the subregion because the exiting block as an edge back into the
917   /// subregion.
918   ///
919   /// @param MA    The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in
920   ///              the subregion's exit block.
921   /// @param LTS   Virtual induction variable mapping.
922   /// @param BBMap A mapping from old values to their new values in this block.
923   /// @param L     Loop surrounding this region statement.
924   ///
925   /// @returns The constructed PHI node.
926   PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap,
927                         Loop *L);
928 
929   /// @param Return the new value of a scalar write, creating a PHINode if
930   ///        necessary.
931   ///
932   /// @param MA    A scalar WRITE MemoryAccess.
933   /// @param LTS   Virtual induction variable mapping.
934   /// @param BBMap A mapping from old values to their new values in this block.
935   ///
936   /// @returns The effective value of @p MA's written value when leaving the
937   ///          subregion.
938   /// @see buildExitPHI
939   Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap);
940 
941   /// Generate the scalar stores for the given statement.
942   ///
943   /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
944   /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
945   /// be demoted to memory.
946   ///
947   /// @param Stmt  The statement we generate code for.
948   /// @param LTS   A mapping from loops virtual canonical induction variable to
949   ///              their new values (for values recalculated in the new ScoP,
950   ///              but not within this basic block)
951   /// @param BBMap A mapping from old values to their new values in this block.
952   /// @param LTS   A mapping from loops virtual canonical induction variable to
953   /// their new values.
954   virtual void
955   generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMAp,
956                        __isl_keep isl_id_to_ast_expr *NewAccesses) override;
957 
958   /// Copy a single PHI instruction.
959   ///
960   /// This copies a single PHI instruction and updates references to old values
961   /// with references to new values, as defined by GlobalMap and BBMap.
962   ///
963   /// @param Stmt      The statement to code generate.
964   /// @param PHI       The PHI instruction to copy.
965   /// @param BBMap     A mapping from old values to their new values
966   ///                  (for values recalculated within this basic block).
967   /// @param LTS       A map from old loops to new induction variables as SCEVs.
968   virtual void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst,
969                                   ValueMapT &BBMap,
970                                   LoopToScevMapT &LTS) override;
971 };
972 } // namespace polly
973 #endif
974