1 //===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the BlockGenerator and VectorBlockGenerator classes, which
10 // generate sequential code and vectorized code for a polyhedral statement,
11 // respectively.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef POLLY_BLOCK_GENERATORS_H
16 #define POLLY_BLOCK_GENERATORS_H
17 
18 #include "polly/CodeGen/IRBuilder.h"
19 #include "polly/Support/ScopHelper.h"
20 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
21 #include "isl/isl-noexceptions.h"
22 
23 namespace polly {
24 using llvm::AllocaInst;
25 using llvm::ArrayRef;
26 using llvm::AssertingVH;
27 using llvm::BasicBlock;
28 using llvm::BinaryOperator;
29 using llvm::CmpInst;
30 using llvm::DataLayout;
31 using llvm::DenseMap;
32 using llvm::DominatorTree;
33 using llvm::Function;
34 using llvm::Instruction;
35 using llvm::LoadInst;
36 using llvm::Loop;
37 using llvm::LoopInfo;
38 using llvm::LoopToScevMapT;
39 using llvm::MapVector;
40 using llvm::PHINode;
41 using llvm::ScalarEvolution;
42 using llvm::SetVector;
43 using llvm::SmallVector;
44 using llvm::StoreInst;
45 using llvm::StringRef;
46 using llvm::Type;
47 using llvm::UnaryInstruction;
48 using llvm::Value;
49 
50 class MemoryAccess;
51 class ScopArrayInfo;
52 class IslExprBuilder;
53 
54 /// Generate a new basic block for a polyhedral statement.
55 class BlockGenerator {
56 public:
57   typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT;
58 
59   /// Map types to resolve scalar dependences.
60   ///
61   ///@{
62   using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>;
63 
64   /// Simple vector of instructions to store escape users.
65   using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
66 
67   /// Map type to resolve escaping users for scalar instructions.
68   ///
69   /// @see The EscapeMap member.
70   using EscapeUsersAllocaMapTy =
71       MapVector<Instruction *,
72                 std::pair<AssertingVH<Value>, EscapeUserVectorTy>>;
73 
74   ///@}
75 
76   /// Create a generator for basic blocks.
77   ///
78   /// @param Builder     The LLVM-IR Builder used to generate the statement. The
79   ///                    code is generated at the location, the Builder points
80   ///                    to.
81   /// @param LI          The loop info for the current function
82   /// @param SE          The scalar evolution info for the current function
83   /// @param DT          The dominator tree of this function.
84   /// @param ScalarMap   Map from scalars to their demoted location.
85   /// @param EscapeMap   Map from scalars to their escape users and locations.
86   /// @param GlobalMap   A mapping from llvm::Values used in the original scop
87   ///                    region to a new set of llvm::Values. Each reference to
88   ///                    an original value appearing in this mapping is replaced
89   ///                    with the new value it is mapped to.
90   /// @param ExprBuilder An expression builder to generate new access functions.
91   /// @param StartBlock  The first basic block after the RTC.
92   BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
93                  DominatorTree &DT, AllocaMapTy &ScalarMap,
94                  EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap,
95                  IslExprBuilder *ExprBuilder, BasicBlock *StartBlock);
96 
97   /// Copy the basic block.
98   ///
99   /// This copies the entire basic block and updates references to old values
100   /// with references to new values, as defined by GlobalMap.
101   ///
102   /// @param Stmt        The block statement to code generate.
103   /// @param LTS         A map from old loops to new induction variables as
104   ///                    SCEVs.
105   /// @param NewAccesses A map from memory access ids to new ast expressions,
106   ///                    which may contain new access expressions for certain
107   ///                    memory accesses.
108   void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
109                 isl_id_to_ast_expr *NewAccesses);
110 
111   /// Remove a ScopArrayInfo's allocation from the ScalarMap.
112   ///
113   /// This function allows to remove values from the ScalarMap. This is useful
114   /// if the corresponding alloca instruction will be deleted (or moved into
115   /// another module), as without removing these values the underlying
116   /// AssertingVH will trigger due to us still keeping reference to this
117   /// scalar.
118   ///
119   /// @param Array The array for which the alloca was generated.
freeScalarAlloc(ScopArrayInfo * Array)120   void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); }
121 
122   /// Return the alloca for @p Access.
123   ///
124   /// If no alloca was mapped for @p Access a new one is created.
125   ///
126   /// @param Access    The memory access for which to generate the alloca.
127   ///
128   /// @returns The alloca for @p Access or a replacement value taken from
129   ///          GlobalMap.
130   Value *getOrCreateAlloca(const MemoryAccess &Access);
131 
132   /// Return the alloca for @p Array.
133   ///
134   /// If no alloca was mapped for @p Array a new one is created.
135   ///
136   /// @param Array The array for which to generate the alloca.
137   ///
138   /// @returns The alloca for @p Array or a replacement value taken from
139   ///          GlobalMap.
140   Value *getOrCreateAlloca(const ScopArrayInfo *Array);
141 
142   /// Finalize the code generation for the SCoP @p S.
143   ///
144   /// This will initialize and finalize the scalar variables we demoted during
145   /// the code generation.
146   ///
147   /// @see createScalarInitialization(Scop &)
148   /// @see createScalarFinalization(Region &)
149   void finalizeSCoP(Scop &S);
150 
151   /// An empty destructor
~BlockGenerator()152   virtual ~BlockGenerator() {}
153 
154   BlockGenerator(const BlockGenerator &) = default;
155 
156 protected:
157   PollyIRBuilder &Builder;
158   LoopInfo &LI;
159   ScalarEvolution &SE;
160   IslExprBuilder *ExprBuilder;
161 
162   /// The dominator tree of this function.
163   DominatorTree &DT;
164 
165   /// The entry block of the current function.
166   BasicBlock *EntryBB;
167 
168   /// Map to resolve scalar dependences for PHI operands and scalars.
169   ///
170   /// When translating code that contains scalar dependences as they result from
171   /// inter-block scalar dependences (including the use of data carrying PHI
172   /// nodes), we do not directly regenerate in-register SSA code, but instead
173   /// allocate some stack memory through which these scalar values are passed.
174   /// Only a later pass of -mem2reg will then (re)introduce in-register
175   /// computations.
176   ///
177   /// To keep track of the memory location(s) used to store the data computed by
178   /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a
179   /// given ScopArrayInfo to the junk of stack allocated memory, that is
180   /// used for code generation.
181   ///
182   /// Up to two different ScopArrayInfo objects are associated with each
183   /// llvm::Value:
184   ///
185   /// MemoryType::Value objects are used for normal scalar dependences that go
186   /// from a scalar definition to its use. Such dependences are lowered by
187   /// directly writing the value an instruction computes into the corresponding
188   /// chunk of memory and reading it back from this chunk of memory right before
189   /// every use of this original scalar value. The memory allocations for
190   /// MemoryType::Value objects end with '.s2a'.
191   ///
192   /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI
193   /// nodes. For each PHI nodes we introduce, besides the Array of type
194   /// MemoryType::Value, a second chunk of memory into which we write at the end
195   /// of each basic block preceding the PHI instruction the value passed
196   /// through this basic block. At the place where the PHI node is executed, we
197   /// replace the PHI node with a load from the corresponding MemoryType::PHI
198   /// memory location. The memory allocations for MemoryType::PHI end with
199   /// '.phiops'.
200   ///
201   /// Example:
202   ///
203   ///                              Input C Code
204   ///                              ============
205   ///
206   ///                 S1:      x1 = ...
207   ///                          for (i=0...N) {
208   ///                 S2:           x2 = phi(x1, add)
209   ///                 S3:           add = x2 + 42;
210   ///                          }
211   ///                 S4:      print(x1)
212   ///                          print(x2)
213   ///                          print(add)
214   ///
215   ///
216   ///        Unmodified IR                         IR After expansion
217   ///        =============                         ==================
218   ///
219   /// S1:   x1 = ...                     S1:    x1 = ...
220   ///                                           x1.s2a = s1
221   ///                                           x2.phiops = s1
222   ///        |                                    |
223   ///        |   <--<--<--<--<                    |   <--<--<--<--<
224   ///        | /              \                   | /              \     .
225   ///        V V               \                  V V               \    .
226   /// S2:  x2 = phi (x1, add)   |        S2:    x2 = x2.phiops       |
227   ///                           |               x2.s2a = x2          |
228   ///                           |                                    |
229   /// S3:  add = x2 + 42        |        S3:    add = x2 + 42        |
230   ///                           |               add.s2a = add        |
231   ///                           |               x2.phiops = add      |
232   ///        | \               /                  | \               /
233   ///        |  \             /                   |  \             /
234   ///        |   >-->-->-->-->                    |   >-->-->-->-->
235   ///        V                                    V
236   ///
237   ///                                    S4:    x1 = x1.s2a
238   /// S4:  ... = x1                             ... = x1
239   ///                                           x2 = x2.s2a
240   ///      ... = x2                             ... = x2
241   ///                                           add = add.s2a
242   ///      ... = add                            ... = add
243   ///
244   ///      ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a,
245   ///                    add:Value -> add.s2a, x2:PHI -> x2.phiops }
246   ///
247   ///  ??? Why does a PHI-node require two memory chunks ???
248   ///
249   ///  One may wonder why a PHI node requires two memory chunks and not just
250   ///  all data is stored in a single location. The following example tries
251   ///  to store all data in .s2a and drops the .phiops location:
252   ///
253   ///      S1:    x1 = ...
254   ///             x1.s2a = s1
255   ///             x2.s2a = s1             // use .s2a instead of .phiops
256   ///               |
257   ///               |   <--<--<--<--<
258   ///               | /              \    .
259   ///               V V               \   .
260   ///      S2:    x2 = x2.s2a          |  // value is same as above, but read
261   ///                                  |  // from .s2a
262   ///                                  |
263   ///             x2.s2a = x2          |  // store into .s2a as normal
264   ///                                  |
265   ///      S3:    add = x2 + 42        |
266   ///             add.s2a = add        |
267   ///             x2.s2a = add         |  // use s2a instead of .phiops
268   ///               | \               /   // !!! This is wrong, as x2.s2a now
269   ///               |   >-->-->-->-->     // contains add instead of x2.
270   ///               V
271   ///
272   ///      S4:    x1 = x1.s2a
273   ///             ... = x1
274   ///             x2 = x2.s2a             // !!! We now read 'add' instead of
275   ///             ... = x2                // 'x2'
276   ///             add = add.s2a
277   ///             ... = add
278   ///
279   ///  As visible in the example, the SSA value of the PHI node may still be
280   ///  needed _after_ the basic block, which could conceptually branch to the
281   ///  PHI node, has been run and has overwritten the PHI's old value. Hence, a
282   ///  single memory location is not enough to code-generate a PHI node.
283   ///
284   /// Memory locations used for the special PHI node modeling.
285   AllocaMapTy &ScalarMap;
286 
287   /// Map from instructions to their escape users as well as the alloca.
288   EscapeUsersAllocaMapTy &EscapeMap;
289 
290   /// A map from llvm::Values referenced in the old code to a new set of
291   ///        llvm::Values, which is used to replace these old values during
292   ///        code generation.
293   ValueMapT &GlobalMap;
294 
295   /// The first basic block after the RTC.
296   BasicBlock *StartBlock;
297 
298   /// Split @p BB to create a new one we can use to clone @p BB in.
299   BasicBlock *splitBB(BasicBlock *BB);
300 
301   /// Copy the given basic block.
302   ///
303   /// @param Stmt      The statement to code generate.
304   /// @param BB        The basic block to code generate.
305   /// @param BBMap     A mapping from old values to their new values in this
306   /// block.
307   /// @param LTS         A map from old loops to new induction variables as
308   ///                    SCEVs.
309   /// @param NewAccesses A map from memory access ids to new ast expressions,
310   ///                    which may contain new access expressions for certain
311   ///                    memory accesses.
312   ///
313   /// @returns The copy of the basic block.
314   BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap,
315                      LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
316 
317   /// Copy the given basic block.
318   ///
319   /// @param Stmt      The statement to code generate.
320   /// @param BB        The basic block to code generate.
321   /// @param BBCopy    The new basic block to generate code in.
322   /// @param BBMap     A mapping from old values to their new values in this
323   /// block.
324   /// @param LTS         A map from old loops to new induction variables as
325   ///                    SCEVs.
326   /// @param NewAccesses A map from memory access ids to new ast expressions,
327   ///                    which may contain new access expressions for certain
328   ///                    memory accesses.
329   void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
330               ValueMapT &BBMap, LoopToScevMapT &LTS,
331               isl_id_to_ast_expr *NewAccesses);
332 
333   /// Generate reload of scalars demoted to memory and needed by @p Stmt.
334   ///
335   /// @param Stmt  The statement we generate code for.
336   /// @param LTS   A mapping from loops virtual canonical induction
337   ///              variable to their new values.
338   /// @param BBMap A mapping from old values to their new values in this block.
339   /// @param NewAccesses A map from memory access ids to new ast expressions.
340   void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT &LTS,
341                            ValueMapT &BBMap,
342                            __isl_keep isl_id_to_ast_expr *NewAccesses);
343 
344   /// When statement tracing is enabled, build the print instructions for
345   /// printing the current statement instance.
346   ///
347   /// The printed output looks like:
348   ///
349   ///     Stmt1(0)
350   ///
351   /// If printing of scalars is enabled, it also appends the value of each
352   /// scalar to the line:
353   ///
354   ///     Stmt1(0) %i=1 %sum=5
355   ///
356   /// @param Stmt  The statement we generate code for.
357   /// @param LTS   A mapping from loops virtual canonical induction
358   ///              variable to their new values.
359   /// @param BBMap A mapping from old values to their new values in this block.
360   void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
361                               ValueMapT &BBMap);
362 
363   /// Generate instructions that compute whether one instance of @p Set is
364   /// executed.
365   ///
366   /// @param Stmt      The statement we generate code for.
367   /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in
368   ///                  @p Stmt's domain are ignored.
369   ///
370   /// @return An expression of type i1, generated into the current builder
371   ///         position, that evaluates to 1 if the executed instance is part of
372   ///         @p Set.
373   Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain);
374 
375   /// Generate code that executes in a subset of @p Stmt's domain.
376   ///
377   /// @param Stmt        The statement we generate code for.
378   /// @param Subdomain   The condition for some code to be executed.
379   /// @param Subject     A name for the code that is executed
380   ///                    conditionally. Used to name new basic blocks and
381   ///                    instructions.
382   /// @param GenThenFunc Callback which generates the code to be executed
383   ///                    when the current executed instance is in @p Set. The
384   ///                    IRBuilder's position is moved to within the block that
385   ///                    executes conditionally for this callback.
386   void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain,
387                                     StringRef Subject,
388                                     const std::function<void()> &GenThenFunc);
389 
390   /// Generate the scalar stores for the given statement.
391   ///
392   /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
393   /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
394   /// be demoted to memory.
395   ///
396   /// @param Stmt  The statement we generate code for.
397   /// @param LTS   A mapping from loops virtual canonical induction
398   ///              variable to their new values
399   ///              (for values recalculated in the new ScoP, but not
400   ///               within this basic block)
401   /// @param BBMap A mapping from old values to their new values in this block.
402   /// @param NewAccesses A map from memory access ids to new ast expressions.
403   virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS,
404                                     ValueMapT &BBMap,
405                                     __isl_keep isl_id_to_ast_expr *NewAccesses);
406 
407   /// Handle users of @p Array outside the SCoP.
408   ///
409   /// @param S         The current SCoP.
410   /// @param Inst      The ScopArrayInfo to handle.
411   void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array);
412 
413   /// Find scalar statements that have outside users.
414   ///
415   /// We register these scalar values to later update subsequent scalar uses of
416   /// these values to either use the newly computed value from within the scop
417   /// (if the scop was executed) or the unchanged original code (if the run-time
418   /// check failed).
419   ///
420   /// @param S The scop for which to find the outside users.
421   void findOutsideUsers(Scop &S);
422 
423   /// Initialize the memory of demoted scalars.
424   ///
425   /// @param S The scop for which to generate the scalar initializers.
426   void createScalarInitialization(Scop &S);
427 
428   /// Create exit PHI node merges for PHI nodes with more than two edges
429   ///        from inside the scop.
430   ///
431   /// For scops which have a PHI node in the exit block that has more than two
432   /// incoming edges from inside the scop region, we require some special
433   /// handling to understand which of the possible values will be passed to the
434   /// PHI node from inside the optimized version of the scop. To do so ScopInfo
435   /// models the possible incoming values as write accesses of the ScopStmts.
436   ///
437   /// This function creates corresponding code to reload the computed outgoing
438   /// value from the stack slot it has been stored into and to pass it on to the
439   /// PHI node in the original exit block.
440   ///
441   /// @param S The scop for which to generate the exiting PHI nodes.
442   void createExitPHINodeMerges(Scop &S);
443 
444   /// Promote the values of demoted scalars after the SCoP.
445   ///
446   /// If a scalar value was used outside the SCoP we need to promote the value
447   /// stored in the memory cell allocated for that scalar and combine it with
448   /// the original value in the non-optimized SCoP.
449   void createScalarFinalization(Scop &S);
450 
451   /// Try to synthesize a new value
452   ///
453   /// Given an old value, we try to synthesize it in a new context from its
454   /// original SCEV expression. We start from the original SCEV expression,
455   /// then replace outdated parameter and loop references, and finally
456   /// expand it to code that computes this updated expression.
457   ///
458   /// @param Stmt      The statement to code generate
459   /// @param Old       The old Value
460   /// @param BBMap     A mapping from old values to their new values
461   ///                  (for values recalculated within this basic block)
462   /// @param LTS       A mapping from loops virtual canonical induction
463   ///                  variable to their new values
464   ///                  (for values recalculated in the new ScoP, but not
465   ///                   within this basic block)
466   /// @param L         The loop that surrounded the instruction that referenced
467   ///                  this value in the original code. This loop is used to
468   ///                  evaluate the scalar evolution at the right scope.
469   ///
470   /// @returns  o A newly synthesized value.
471   ///           o NULL, if synthesizing the value failed.
472   Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
473                                LoopToScevMapT &LTS, Loop *L) const;
474 
475   /// Get the new version of a value.
476   ///
477   /// Given an old value, we first check if a new version of this value is
478   /// available in the BBMap or GlobalMap. In case it is not and the value can
479   /// be recomputed using SCEV, we do so. If we can not recompute a value
480   /// using SCEV, but we understand that the value is constant within the scop,
481   /// we return the old value.  If the value can still not be derived, this
482   /// function will assert.
483   ///
484   /// @param Stmt      The statement to code generate.
485   /// @param Old       The old Value.
486   /// @param BBMap     A mapping from old values to their new values
487   ///                  (for values recalculated within this basic block).
488   /// @param LTS       A mapping from loops virtual canonical induction
489   ///                  variable to their new values
490   ///                  (for values recalculated in the new ScoP, but not
491   ///                   within this basic block).
492   /// @param L         The loop that surrounded the instruction that referenced
493   ///                  this value in the original code. This loop is used to
494   ///                  evaluate the scalar evolution at the right scope.
495   ///
496   /// @returns  o The old value, if it is still valid.
497   ///           o The new value, if available.
498   ///           o NULL, if no value is found.
499   Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
500                      LoopToScevMapT &LTS, Loop *L) const;
501 
502   void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
503                       LoopToScevMapT &LTS);
504 
505   /// Get the innermost loop that surrounds the statement @p Stmt.
506   Loop *getLoopForStmt(const ScopStmt &Stmt) const;
507 
508   /// Generate the operand address
509   /// @param NewAccesses A map from memory access ids to new ast expressions,
510   ///                    which may contain new access expressions for certain
511   ///                    memory accesses.
512   Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
513                                   ValueMapT &BBMap, LoopToScevMapT &LTS,
514                                   isl_id_to_ast_expr *NewAccesses);
515 
516   /// Generate the operand address.
517   ///
518   /// @param Stmt         The statement to generate code for.
519   /// @param L            The innermost loop that surrounds the statement.
520   /// @param Pointer      If the access expression is not changed (ie. not found
521   ///                     in @p LTS), use this Pointer from the original code
522   ///                     instead.
523   /// @param BBMap        A mapping from old values to their new values.
524   /// @param LTS          A mapping from loops virtual canonical induction
525   ///                     variable to their new values.
526   /// @param NewAccesses  Ahead-of-time generated access expressions.
527   /// @param Id           Identifier of the MemoryAccess to generate.
528   /// @param ExpectedType The type the returned value should have.
529   ///
530   /// @return The generated address.
531   Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer,
532                                   ValueMapT &BBMap, LoopToScevMapT &LTS,
533                                   isl_id_to_ast_expr *NewAccesses,
534                                   __isl_take isl_id *Id, Type *ExpectedType);
535 
536   /// Generate the pointer value that is accesses by @p Access.
537   ///
538   /// For write accesses, generate the target address. For read accesses,
539   /// generate the source address.
540   /// The access can be either an array access or a scalar access. In the first
541   /// case, the returned address will point to an element into that array. In
542   /// the scalar case, an alloca is used.
543   /// If a new AccessRelation is set for the MemoryAccess, the new relation will
544   /// be used.
545   ///
546   /// @param Access      The access to generate a pointer for.
547   /// @param L           The innermost loop that surrounds the statement.
548   /// @param LTS         A mapping from loops virtual canonical induction
549   ///                    variable to their new values.
550   /// @param BBMap       A mapping from old values to their new values.
551   /// @param NewAccesses A map from memory access ids to new ast expressions.
552   ///
553   /// @return The generated address.
554   Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT &LTS,
555                             ValueMapT &BBMap,
556                             __isl_keep isl_id_to_ast_expr *NewAccesses);
557 
558   /// @param NewAccesses A map from memory access ids to new ast expressions,
559   ///                    which may contain new access expressions for certain
560   ///                    memory accesses.
561   Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap,
562                            LoopToScevMapT &LTS,
563                            isl_id_to_ast_expr *NewAccesses);
564 
565   /// @param NewAccesses A map from memory access ids to new ast expressions,
566   ///                    which may contain new access expressions for certain
567   ///                    memory accesses.
568   void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap,
569                           LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
570 
571   /// Copy a single PHI instruction.
572   ///
573   /// The implementation in the BlockGenerator is trivial, however it allows
574   /// subclasses to handle PHIs different.
copyPHIInstruction(ScopStmt &,PHINode *,ValueMapT &,LoopToScevMapT &)575   virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &,
576                                   LoopToScevMapT &) {}
577 
578   /// Copy a single Instruction.
579   ///
580   /// This copies a single Instruction and updates references to old values
581   /// with references to new values, as defined by GlobalMap and BBMap.
582   ///
583   /// @param Stmt        The statement to code generate.
584   /// @param Inst        The instruction to copy.
585   /// @param BBMap       A mapping from old values to their new values
586   ///                    (for values recalculated within this basic block).
587   /// @param GlobalMap   A mapping from old values to their new values
588   ///                    (for values recalculated in the new ScoP, but not
589   ///                    within this basic block).
590   /// @param LTS         A mapping from loops virtual canonical induction
591   ///                    variable to their new values
592   ///                    (for values recalculated in the new ScoP, but not
593   ///                     within this basic block).
594   /// @param NewAccesses A map from memory access ids to new ast expressions,
595   ///                    which may contain new access expressions for certain
596   ///                    memory accesses.
597   void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
598                        LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
599 
600   /// Helper to determine if @p Inst can be synthesized in @p Stmt.
601   ///
602   /// @returns false, iff @p Inst can be synthesized in @p Stmt.
603   bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst);
604 
605   /// Remove dead instructions generated for BB
606   ///
607   /// @param BB The basic block code for which code has been generated.
608   /// @param BBMap A local map from old to new instructions.
609   void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap);
610 
611   /// Invalidate the scalar evolution expressions for a scop.
612   ///
613   /// This function invalidates the scalar evolution results for all
614   /// instructions that are part of a given scop, and the loops
615   /// surrounding the users of merge blocks. This is necessary to ensure that
616   /// later scops do not obtain scalar evolution expressions that reference
617   /// values that earlier dominated the later scop, but have been moved in the
618   /// conditional part of an earlier scop and consequently do not any more
619   /// dominate the later scop.
620   ///
621   /// @param S The scop to invalidate.
622   void invalidateScalarEvolution(Scop &S);
623 };
624 
625 /// Generate a new vector basic block for a polyhedral statement.
626 ///
627 /// The only public function exposed is generate().
628 class VectorBlockGenerator : BlockGenerator {
629 public:
630   /// Generate a new vector basic block for a ScoPStmt.
631   ///
632   /// This code generation is similar to the normal, scalar code generation,
633   /// except that each instruction is code generated for several vector lanes
634   /// at a time. If possible instructions are issued as actual vector
635   /// instructions, but e.g. for address calculation instructions we currently
636   /// generate scalar instructions for each vector lane.
637   ///
638   /// @param BlockGen    A block generator object used as parent.
639   /// @param Stmt        The statement to code generate.
640   /// @param VLTS        A mapping from loops virtual canonical induction
641   ///                    variable to their new values
642   ///                    (for values recalculated in the new ScoP, but not
643   ///                     within this basic block), one for each lane.
644   /// @param Schedule    A map from the statement to a schedule where the
645   ///                    innermost dimension is the dimension of the innermost
646   ///                    loop containing the statement.
647   /// @param NewAccesses A map from memory access ids to new ast expressions,
648   ///                    which may contain new access expressions for certain
649   ///                    memory accesses.
generate(BlockGenerator & BlockGen,ScopStmt & Stmt,std::vector<LoopToScevMapT> & VLTS,__isl_keep isl_map * Schedule,__isl_keep isl_id_to_ast_expr * NewAccesses)650   static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
651                        std::vector<LoopToScevMapT> &VLTS,
652                        __isl_keep isl_map *Schedule,
653                        __isl_keep isl_id_to_ast_expr *NewAccesses) {
654     VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
655     Generator.copyStmt(Stmt, NewAccesses);
656   }
657 
658 private:
659   // This is a vector of loop->scev maps.  The first map is used for the first
660   // vector lane, ...
661   // Each map, contains information about Instructions in the old ScoP, which
662   // are recalculated in the new SCoP. When copying the basic block, we replace
663   // all references to the old instructions with their recalculated values.
664   //
665   // For example, when the code generator produces this AST:
666   //
667   //   for (int c1 = 0; c1 <= 1023; c1 += 1)
668   //     for (int c2 = 0; c2 <= 1023; c2 += VF)
669   //       for (int lane = 0; lane <= VF; lane += 1)
670   //         Stmt(c2 + lane + 3, c1);
671   //
672   // VLTS[lane] contains a map:
673   //   "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
674   //   "inner loop in the old loop nest" -> SCEV("c1").
675   std::vector<LoopToScevMapT> &VLTS;
676 
677   // A map from the statement to a schedule where the innermost dimension is the
678   // dimension of the innermost loop containing the statement.
679   isl_map *Schedule;
680 
681   VectorBlockGenerator(BlockGenerator &BlockGen,
682                        std::vector<LoopToScevMapT> &VLTS,
683                        __isl_keep isl_map *Schedule);
684 
685   int getVectorWidth();
686 
687   Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
688                         VectorValueMapT &ScalarMaps, Loop *L);
689 
690   /// Load a vector from a set of adjacent scalars
691   ///
692   /// In case a set of scalars is known to be next to each other in memory,
693   /// create a vector load that loads those scalars
694   ///
695   /// %vector_ptr= bitcast double* %p to <4 x double>*
696   /// %vec_full = load <4 x double>* %vector_ptr
697   ///
698   /// @param Stmt           The statement to code generate.
699   /// @param NegativeStride This is used to indicate a -1 stride. In such
700   ///                       a case we load the end of a base address and
701   ///                       shuffle the accesses in reverse order into the
702   ///                       vector. By default we would do only positive
703   ///                       strides.
704   ///
705   /// @param NewAccesses    A map from memory access ids to new ast
706   ///                       expressions, which may contain new access
707   ///                       expressions for certain memory accesses.
708   Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
709                                VectorValueMapT &ScalarMaps,
710                                __isl_keep isl_id_to_ast_expr *NewAccesses,
711                                bool NegativeStride);
712 
713   /// Load a vector initialized from a single scalar in memory
714   ///
715   /// In case all elements of a vector are initialized to the same
716   /// scalar value, this value is loaded and shuffled into all elements
717   /// of the vector.
718   ///
719   /// %splat_one = load <1 x double>* %p
720   /// %splat = shufflevector <1 x double> %splat_one, <1 x
721   ///       double> %splat_one, <4 x i32> zeroinitializer
722   ///
723   /// @param NewAccesses A map from memory access ids to new ast expressions,
724   ///                    which may contain new access expressions for certain
725   ///                    memory accesses.
726   Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
727                                 ValueMapT &BBMap,
728                                 __isl_keep isl_id_to_ast_expr *NewAccesses);
729 
730   /// Load a vector from scalars distributed in memory
731   ///
732   /// In case some scalars a distributed randomly in memory. Create a vector
733   /// by loading each scalar and by inserting one after the other into the
734   /// vector.
735   ///
736   /// %scalar_1= load double* %p_1
737   /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
738   /// %scalar 2 = load double* %p_2
739   /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
740   ///
741   /// @param NewAccesses A map from memory access ids to new ast expressions,
742   ///                    which may contain new access expressions for certain
743   ///                    memory accesses.
744   Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
745                                    VectorValueMapT &ScalarMaps,
746                                    __isl_keep isl_id_to_ast_expr *NewAccesses);
747 
748   /// @param NewAccesses A map from memory access ids to new ast expressions,
749   ///                    which may contain new access expressions for certain
750   ///                    memory accesses.
751   void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
752                     VectorValueMapT &ScalarMaps,
753                     __isl_keep isl_id_to_ast_expr *NewAccesses);
754 
755   void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
756                      ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
757 
758   void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
759                       ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
760 
761   /// @param NewAccesses A map from memory access ids to new ast expressions,
762   ///                    which may contain new access expressions for certain
763   ///                    memory accesses.
764   void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
765                  VectorValueMapT &ScalarMaps,
766                  __isl_keep isl_id_to_ast_expr *NewAccesses);
767 
768   /// @param NewAccesses A map from memory access ids to new ast expressions,
769   ///                    which may contain new access expressions for certain
770   ///                    memory accesses.
771   void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
772                           ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
773                           __isl_keep isl_id_to_ast_expr *NewAccesses);
774 
775   bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
776                            VectorValueMapT &ScalarMaps);
777 
778   bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
779 
780   /// Generate vector loads for scalars.
781   ///
782   /// @param Stmt           The scop statement for which to generate the loads.
783   /// @param VectorBlockMap A map that will be updated to relate the original
784   ///                       values with the newly generated vector loads.
785   void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
786 
787   /// Verify absence of scalar stores.
788   ///
789   /// @param Stmt The scop statement to check for scalar stores.
790   void verifyNoScalarStores(ScopStmt &Stmt);
791 
792   /// @param NewAccesses A map from memory access ids to new ast expressions,
793   ///                    which may contain new access expressions for certain
794   ///                    memory accesses.
795   void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
796                        VectorValueMapT &ScalarMaps,
797                        __isl_keep isl_id_to_ast_expr *NewAccesses);
798 
799   /// @param NewAccesses A map from memory access ids to new ast expressions,
800   ///                    which may contain new access expressions for certain
801   ///                    memory accesses.
802   void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
803 };
804 
805 /// Generator for new versions of polyhedral region statements.
806 class RegionGenerator : public BlockGenerator {
807 public:
808   /// Create a generator for regions.
809   ///
810   /// @param BlockGen A generator for basic blocks.
RegionGenerator(BlockGenerator & BlockGen)811   RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {}
812 
~RegionGenerator()813   virtual ~RegionGenerator() {}
814 
815   /// Copy the region statement @p Stmt.
816   ///
817   /// This copies the entire region represented by @p Stmt and updates
818   /// references to old values with references to new values, as defined by
819   /// GlobalMap.
820   ///
821   /// @param Stmt      The statement to code generate.
822   /// @param LTS       A map from old loops to new induction variables as SCEVs.
823   void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
824                 __isl_keep isl_id_to_ast_expr *IdToAstExp);
825 
826 private:
827   /// A map from old to the first new block in the region, that was created to
828   /// model the old basic block.
829   DenseMap<BasicBlock *, BasicBlock *> StartBlockMap;
830 
831   /// A map from old to the last new block in the region, that was created to
832   /// model the old basic block.
833   DenseMap<BasicBlock *, BasicBlock *> EndBlockMap;
834 
835   /// The "BBMaps" for the whole region (one for each block). In case a basic
836   /// block is code generated to multiple basic blocks (e.g., for partial
837   /// writes), the StartBasic is used as index for the RegionMap.
838   DenseMap<BasicBlock *, ValueMapT> RegionMaps;
839 
840   /// Mapping to remember PHI nodes that still need incoming values.
841   using PHINodePairTy = std::pair<PHINode *, PHINode *>;
842   DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
843 
844   /// Repair the dominance tree after we created a copy block for @p BB.
845   ///
846   /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
847   BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
848 
849   /// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
850   ///
851   /// PHI nodes, which may have (multiple) edges that enter from outside the
852   /// non-affine subregion and even from outside the scop, are code generated as
853   /// follows:
854   ///
855   /// # Original
856   ///
857   ///   Region: %A-> %exit
858   ///   NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
859   ///
860   ///     pre:
861   ///       %val = add i64 1, 1
862   ///
863   ///     A:
864   ///      br label %nonaff
865   ///
866   ///     nonaffB:
867   ///       %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
868   ///       %cmp = <nonaff>
869   ///       br i1 %cmp, label %C, label %nonaffC
870   ///
871   ///     nonaffC:
872   ///       %valC = add i64 1, 1
873   ///       br i1 undef, label %D, label %nonaffB
874   ///
875   ///     D:
876   ///       %valD = ...
877   ///       %exit_cond = <loopexit>
878   ///       br i1 %exit_cond, label %nonaffB, label %exit
879   ///
880   ///     exit:
881   ///       ...
882   ///
883   ///  - %start and %C enter from outside the non-affine region.
884   ///  - %nonaffC enters from within the non-affine region.
885   ///
886   ///  # New
887   ///
888   ///    polly.A:
889   ///       store i64 %val, i64* %phi.phiops
890   ///       br label %polly.nonaffA.entry
891   ///
892   ///    polly.nonaffB.entry:
893   ///       %phi.phiops.reload = load i64, i64* %phi.phiops
894   ///       br label %nonaffB
895   ///
896   ///    polly.nonaffB:
897   ///       %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
898   ///                    [%p.valC, %polly.nonaffC]
899   ///
900   ///    polly.nonaffC:
901   ///       %p.valC = add i64 1, 1
902   ///       br i1 undef, label %polly.D, label %polly.nonaffB
903   ///
904   ///    polly.D:
905   ///        %p.valD = ...
906   ///        store i64 %p.valD, i64* %phi.phiops
907   ///        %p.exit_cond = <loopexit>
908   ///        br i1 %p.exit_cond, label %polly.nonaffB, label %exit
909   ///
910   /// Values that enter the PHI from outside the non-affine region are stored
911   /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
912   /// reloaded in %polly.nonaffB.entry, a basic block generated before the
913   /// actual non-affine region.
914   ///
915   /// When generating the PHI node of the non-affine region in %polly.nonaffB,
916   /// incoming edges from outside the region are combined into a single branch
917   /// from %polly.nonaffB.entry which has as incoming value the value reloaded
918   /// from the %phi.phiops stack slot. Incoming edges from within the region
919   /// refer to the copied instructions (%p.valC) and basic blocks
920   /// (%polly.nonaffC) of the non-affine region.
921   ///
922   /// @param Stmt       The statement to code generate.
923   /// @param PHI        The original PHI we copy.
924   /// @param PHICopy    The copy of @p PHI.
925   /// @param IncomingBB An incoming block of @p PHI.
926   /// @param LTS        A map from old loops to new induction variables as
927   /// SCEVs.
928   void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy,
929                        BasicBlock *IncomingBB, LoopToScevMapT &LTS);
930 
931   /// Create a PHI that combines the incoming values from all incoming blocks
932   /// that are in the subregion.
933   ///
934   /// PHIs in the subregion's exit block can have incoming edges from within and
935   /// outside the subregion. This function combines the incoming values from
936   /// within the subregion to appear as if there is only one incoming edge from
937   /// the subregion (an additional exit block is created by RegionGenerator).
938   /// This is to avoid that a value is written to the .phiops location without
939   /// leaving the subregion because the exiting block as an edge back into the
940   /// subregion.
941   ///
942   /// @param MA    The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in
943   ///              the subregion's exit block.
944   /// @param LTS   Virtual induction variable mapping.
945   /// @param BBMap A mapping from old values to their new values in this block.
946   /// @param L     Loop surrounding this region statement.
947   ///
948   /// @returns The constructed PHI node.
949   PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap,
950                         Loop *L);
951 
952   /// @param Return the new value of a scalar write, creating a PHINode if
953   ///        necessary.
954   ///
955   /// @param MA    A scalar WRITE MemoryAccess.
956   /// @param LTS   Virtual induction variable mapping.
957   /// @param BBMap A mapping from old values to their new values in this block.
958   ///
959   /// @returns The effective value of @p MA's written value when leaving the
960   ///          subregion.
961   /// @see buildExitPHI
962   Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap);
963 
964   /// Generate the scalar stores for the given statement.
965   ///
966   /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
967   /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
968   /// be demoted to memory.
969   ///
970   /// @param Stmt  The statement we generate code for.
971   /// @param LTS   A mapping from loops virtual canonical induction variable to
972   ///              their new values (for values recalculated in the new ScoP,
973   ///              but not within this basic block)
974   /// @param BBMap A mapping from old values to their new values in this block.
975   /// @param LTS   A mapping from loops virtual canonical induction variable to
976   /// their new values.
977   virtual void
978   generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMAp,
979                        __isl_keep isl_id_to_ast_expr *NewAccesses) override;
980 
981   /// Copy a single PHI instruction.
982   ///
983   /// This copies a single PHI instruction and updates references to old values
984   /// with references to new values, as defined by GlobalMap and BBMap.
985   ///
986   /// @param Stmt      The statement to code generate.
987   /// @param PHI       The PHI instruction to copy.
988   /// @param BBMap     A mapping from old values to their new values
989   ///                  (for values recalculated within this basic block).
990   /// @param LTS       A map from old loops to new induction variables as SCEVs.
991   virtual void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst,
992                                   ValueMapT &BBMap,
993                                   LoopToScevMapT &LTS) override;
994 };
995 } // namespace polly
996 #endif
997