1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Undo the effect of Loop Invariant Code Motion (LICM) and
10 // GVN Partial Redundancy Elimination (PRE) on SCoP-level.
11 //
12 // Namely, remove register/scalar dependencies by mapping them back to array
13 // elements.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "polly/DeLICM.h"
18 #include "polly/LinkAllPasses.h"
19 #include "polly/Options.h"
20 #include "polly/ScopInfo.h"
21 #include "polly/ScopPass.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/ISLOStream.h"
24 #include "polly/Support/ISLTools.h"
25 #include "polly/ZoneAlgo.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/InitializePasses.h"
28 
29 #define DEBUG_TYPE "polly-delicm"
30 
31 using namespace polly;
32 using namespace llvm;
33 
34 namespace {
35 
36 cl::opt<int>
37     DelicmMaxOps("polly-delicm-max-ops",
38                  cl::desc("Maximum number of isl operations to invest for "
39                           "lifetime analysis; 0=no limit"),
40                  cl::init(1000000), cl::cat(PollyCategory));
41 
42 cl::opt<bool> DelicmOverapproximateWrites(
43     "polly-delicm-overapproximate-writes",
44     cl::desc(
45         "Do more PHI writes than necessary in order to avoid partial accesses"),
46     cl::init(false), cl::Hidden, cl::cat(PollyCategory));
47 
48 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes",
49                                   cl::desc("Allow partial writes"),
50                                   cl::init(true), cl::Hidden,
51                                   cl::cat(PollyCategory));
52 
53 cl::opt<bool>
54     DelicmComputeKnown("polly-delicm-compute-known",
55                        cl::desc("Compute known content of array elements"),
56                        cl::init(true), cl::Hidden, cl::cat(PollyCategory));
57 
58 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs");
59 STATISTIC(DeLICMOutOfQuota,
60           "Analyses aborted because max_operations was reached");
61 STATISTIC(MappedValueScalars, "Number of mapped Value scalars");
62 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars");
63 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping");
64 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized");
65 
66 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM");
67 STATISTIC(NumValueWritesInLoops,
68           "Number of scalar value writes nested in affine loops after DeLICM");
69 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM");
70 STATISTIC(NumPHIWritesInLoops,
71           "Number of scalar phi writes nested in affine loops after DeLICM");
72 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM");
73 STATISTIC(NumSingletonWritesInLoops,
74           "Number of singleton writes nested in affine loops after DeLICM");
75 
computeReachingOverwrite(isl::union_map Schedule,isl::union_map Writes,bool InclPrevWrite,bool InclOverwrite)76 isl::union_map computeReachingOverwrite(isl::union_map Schedule,
77                                         isl::union_map Writes,
78                                         bool InclPrevWrite,
79                                         bool InclOverwrite) {
80   return computeReachingWrite(Schedule, Writes, true, InclPrevWrite,
81                               InclOverwrite);
82 }
83 
84 /// Compute the next overwrite for a scalar.
85 ///
86 /// @param Schedule      { DomainWrite[] -> Scatter[] }
87 ///                      Schedule of (at least) all writes. Instances not in @p
88 ///                      Writes are ignored.
89 /// @param Writes        { DomainWrite[] }
90 ///                      The element instances that write to the scalar.
91 /// @param InclPrevWrite Whether to extend the timepoints to include
92 ///                      the timepoint where the previous write happens.
93 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint
94 ///                      of the overwrite itself.
95 ///
96 /// @return { Scatter[] -> DomainDef[] }
computeScalarReachingOverwrite(isl::union_map Schedule,isl::union_set Writes,bool InclPrevWrite,bool InclOverwrite)97 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule,
98                                               isl::union_set Writes,
99                                               bool InclPrevWrite,
100                                               bool InclOverwrite) {
101 
102   // { DomainWrite[] }
103   auto WritesMap = isl::union_map::from_domain(Writes);
104 
105   // { [Element[] -> Scatter[]] -> DomainWrite[] }
106   auto Result = computeReachingOverwrite(
107       std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite);
108 
109   return Result.domain_factor_range();
110 }
111 
112 /// Overload of computeScalarReachingOverwrite, with only one writing statement.
113 /// Consequently, the result consists of only one map space.
114 ///
115 /// @param Schedule      { DomainWrite[] -> Scatter[] }
116 /// @param Writes        { DomainWrite[] }
117 /// @param InclPrevWrite Include the previous write to result.
118 /// @param InclOverwrite Include the overwrite to the result.
119 ///
120 /// @return { Scatter[] -> DomainWrite[] }
computeScalarReachingOverwrite(isl::union_map Schedule,isl::set Writes,bool InclPrevWrite,bool InclOverwrite)121 isl::map computeScalarReachingOverwrite(isl::union_map Schedule,
122                                         isl::set Writes, bool InclPrevWrite,
123                                         bool InclOverwrite) {
124   isl::space ScatterSpace = getScatterSpace(Schedule);
125   isl::space DomSpace = Writes.get_space();
126 
127   isl::union_map ReachOverwrite = computeScalarReachingOverwrite(
128       Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite);
129 
130   isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace);
131   return singleton(std::move(ReachOverwrite), ResultSpace);
132 }
133 
134 /// Try to find a 'natural' extension of a mapped to elements outside its
135 /// domain.
136 ///
137 /// @param Relevant The map with mapping that may not be modified.
138 /// @param Universe The domain to which @p Relevant needs to be extended.
139 ///
140 /// @return A map with that associates the domain elements of @p Relevant to the
141 ///         same elements and in addition the elements of @p Universe to some
142 ///         undefined elements. The function prefers to return simple maps.
expandMapping(isl::union_map Relevant,isl::union_set Universe)143 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) {
144   Relevant = Relevant.coalesce();
145   isl::union_set RelevantDomain = Relevant.domain();
146   isl::union_map Simplified = Relevant.gist_domain(RelevantDomain);
147   Simplified = Simplified.coalesce();
148   return Simplified.intersect_domain(Universe);
149 }
150 
151 /// Represent the knowledge of the contents of any array elements in any zone or
152 /// the knowledge we would add when mapping a scalar to an array element.
153 ///
154 /// Every array element at every zone unit has one of two states:
155 ///
156 /// - Unused: Not occupied by any value so a transformation can change it to
157 ///   other values.
158 ///
159 /// - Occupied: The element contains a value that is still needed.
160 ///
161 /// The union of Unused and Unknown zones forms the universe, the set of all
162 /// elements at every timepoint. The universe can easily be derived from the
163 /// array elements that are accessed someway. Arrays that are never accessed
164 /// also never play a role in any computation and can hence be ignored. With a
165 /// given universe, only one of the sets needs to stored implicitly. Computing
166 /// the complement is also an expensive operation, hence this class has been
167 /// designed that only one of sets is needed while the other is assumed to be
168 /// implicit. It can still be given, but is mostly ignored.
169 ///
170 /// There are two use cases for the Knowledge class:
171 ///
172 /// 1) To represent the knowledge of the current state of ScopInfo. The unused
173 ///    state means that an element is currently unused: there is no read of it
174 ///    before the next overwrite. Also called 'Existing'.
175 ///
176 /// 2) To represent the requirements for mapping a scalar to array elements. The
177 ///    unused state means that there is no change/requirement. Also called
178 ///    'Proposed'.
179 ///
180 /// In addition to these states at unit zones, Knowledge needs to know when
181 /// values are written. This is because written values may have no lifetime (one
182 /// reason is that the value is never read). Such writes would therefore never
183 /// conflict, but overwrite values that might still be required. Another source
184 /// of problems are multiple writes to the same element at the same timepoint,
185 /// because their order is undefined.
186 class Knowledge {
187 private:
188   /// { [Element[] -> Zone[]] }
189   /// Set of array elements and when they are alive.
190   /// Can contain a nullptr; in this case the set is implicitly defined as the
191   /// complement of #Unused.
192   ///
193   /// The set of alive array elements is represented as zone, as the set of live
194   /// values can differ depending on how the elements are interpreted.
195   /// Assuming a value X is written at timestep [0] and read at timestep [1]
196   /// without being used at any later point, then the value is alive in the
197   /// interval ]0,1[. This interval cannot be represented by an integer set, as
198   /// it does not contain any integer point. Zones allow us to represent this
199   /// interval and can be converted to sets of timepoints when needed (e.g., in
200   /// isConflicting when comparing to the write sets).
201   /// @see convertZoneToTimepoints and this file's comment for more details.
202   isl::union_set Occupied;
203 
204   /// { [Element[] -> Zone[]] }
205   /// Set of array elements when they are not alive, i.e. their memory can be
206   /// used for other purposed. Can contain a nullptr; in this case the set is
207   /// implicitly defined as the complement of #Occupied.
208   isl::union_set Unused;
209 
210   /// { [Element[] -> Zone[]] -> ValInst[] }
211   /// Maps to the known content for each array element at any interval.
212   ///
213   /// Any element/interval can map to multiple known elements. This is due to
214   /// multiple llvm::Value referring to the same content. Examples are
215   ///
216   /// - A value stored and loaded again. The LoadInst represents the same value
217   /// as the StoreInst's value operand.
218   ///
219   /// - A PHINode is equal to any one of the incoming values. In case of
220   /// LCSSA-form, it is always equal to its single incoming value.
221   ///
222   /// Two Knowledges are considered not conflicting if at least one of the known
223   /// values match. Not known values are not stored as an unnamed tuple (as
224   /// #Written does), but maps to nothing.
225   ///
226   ///  Known values are usually just defined for #Occupied elements. Knowing
227   ///  #Unused contents has no advantage as it can be overwritten.
228   isl::union_map Known;
229 
230   /// { [Element[] -> Scatter[]] -> ValInst[] }
231   /// The write actions currently in the scop or that would be added when
232   /// mapping a scalar. Maps to the value that is written.
233   ///
234   /// Written values that cannot be identified are represented by an unknown
235   /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself.
236   isl::union_map Written;
237 
238   /// Check whether this Knowledge object is well-formed.
checkConsistency() const239   void checkConsistency() const {
240 #ifndef NDEBUG
241     // Default-initialized object
242     if (Occupied.is_null() && Unused.is_null() && Known.is_null() &&
243         Written.is_null())
244       return;
245 
246     assert(!Occupied.is_null() || !Unused.is_null());
247     assert(!Known.is_null());
248     assert(!Written.is_null());
249 
250     // If not all fields are defined, we cannot derived the universe.
251     if (Occupied.is_null() || Unused.is_null())
252       return;
253 
254     assert(Occupied.is_disjoint(Unused));
255     auto Universe = Occupied.unite(Unused);
256 
257     assert(!Known.domain().is_subset(Universe).is_false());
258     assert(!Written.domain().is_subset(Universe).is_false());
259 #endif
260   }
261 
262 public:
263   /// Initialize a nullptr-Knowledge. This is only provided for convenience; do
264   /// not use such an object.
Knowledge()265   Knowledge() {}
266 
267   /// Create a new object with the given members.
Knowledge(isl::union_set Occupied,isl::union_set Unused,isl::union_map Known,isl::union_map Written)268   Knowledge(isl::union_set Occupied, isl::union_set Unused,
269             isl::union_map Known, isl::union_map Written)
270       : Occupied(std::move(Occupied)), Unused(std::move(Unused)),
271         Known(std::move(Known)), Written(std::move(Written)) {
272     checkConsistency();
273   }
274 
275   /// Return whether this object was not default-constructed.
isUsable() const276   bool isUsable() const {
277     return (Occupied.is_null() || Unused.is_null()) && !Known.is_null() &&
278            !Written.is_null();
279   }
280 
281   /// Print the content of this object to @p OS.
print(llvm::raw_ostream & OS,unsigned Indent=0) const282   void print(llvm::raw_ostream &OS, unsigned Indent = 0) const {
283     if (isUsable()) {
284       if (!Occupied.is_null())
285         OS.indent(Indent) << "Occupied: " << Occupied << "\n";
286       else
287         OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n";
288       if (!Unused.is_null())
289         OS.indent(Indent) << "Unused:   " << Unused << "\n";
290       else
291         OS.indent(Indent) << "Unused:   <Everything else not in Occupied>\n";
292       OS.indent(Indent) << "Known:    " << Known << "\n";
293       OS.indent(Indent) << "Written : " << Written << '\n';
294     } else {
295       OS.indent(Indent) << "Invalid knowledge\n";
296     }
297   }
298 
299   /// Combine two knowledges, this and @p That.
learnFrom(Knowledge That)300   void learnFrom(Knowledge That) {
301     assert(!isConflicting(*this, That));
302     assert(!Unused.is_null() && !That.Occupied.is_null());
303     assert(
304         That.Unused.is_null() &&
305         "This function is only prepared to learn occupied elements from That");
306     assert(Occupied.is_null() && "This function does not implement "
307                                  "`this->Occupied = "
308                                  "this->Occupied.unite(That.Occupied);`");
309 
310     Unused = Unused.subtract(That.Occupied);
311     Known = Known.unite(That.Known);
312     Written = Written.unite(That.Written);
313 
314     checkConsistency();
315   }
316 
317   /// Determine whether two Knowledges conflict with each other.
318   ///
319   /// In theory @p Existing and @p Proposed are symmetric, but the
320   /// implementation is constrained by the implicit interpretation. That is, @p
321   /// Existing must have #Unused defined (use case 1) and @p Proposed must have
322   /// #Occupied defined (use case 1).
323   ///
324   /// A conflict is defined as non-preserved semantics when they are merged. For
325   /// instance, when for the same array and zone they assume different
326   /// llvm::Values.
327   ///
328   /// @param Existing One of the knowledges with #Unused defined.
329   /// @param Proposed One of the knowledges with #Occupied defined.
330   /// @param OS       Dump the conflict reason to this output stream; use
331   ///                 nullptr to not output anything.
332   /// @param Indent   Indention for the conflict reason.
333   ///
334   /// @return True, iff the two knowledges are conflicting.
isConflicting(const Knowledge & Existing,const Knowledge & Proposed,llvm::raw_ostream * OS=nullptr,unsigned Indent=0)335   static bool isConflicting(const Knowledge &Existing,
336                             const Knowledge &Proposed,
337                             llvm::raw_ostream *OS = nullptr,
338                             unsigned Indent = 0) {
339     assert(!Existing.Unused.is_null());
340     assert(!Proposed.Occupied.is_null());
341 
342 #ifndef NDEBUG
343     if (!Existing.Occupied.is_null() && !Proposed.Unused.is_null()) {
344       auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused);
345       auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused);
346       assert(ExistingUniverse.is_equal(ProposedUniverse) &&
347              "Both inputs' Knowledges must be over the same universe");
348     }
349 #endif
350 
351     // Do the Existing and Proposed lifetimes conflict?
352     //
353     // Lifetimes are described as the cross-product of array elements and zone
354     // intervals in which they are alive (the space { [Element[] -> Zone[]] }).
355     // In the following we call this "element/lifetime interval".
356     //
357     // In order to not conflict, one of the following conditions must apply for
358     // each element/lifetime interval:
359     //
360     // 1. If occupied in one of the knowledges, it is unused in the other.
361     //
362     //   - or -
363     //
364     // 2. Both contain the same value.
365     //
366     // Instead of partitioning the element/lifetime intervals into a part that
367     // both Knowledges occupy (which requires an expensive subtraction) and for
368     // these to check whether they are known to be the same value, we check only
369     // the second condition and ensure that it also applies when then first
370     // condition is true. This is done by adding a wildcard value to
371     // Proposed.Known and Existing.Unused such that they match as a common known
372     // value. We use the "unknown ValInst" for this purpose. Every
373     // Existing.Unused may match with an unknown Proposed.Occupied because these
374     // never are in conflict with each other.
375     auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied);
376     auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal);
377 
378     auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused);
379     auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal);
380 
381     auto MatchingVals = ExistingValues.intersect(ProposedValues);
382     auto Matches = MatchingVals.domain();
383 
384     // Any Proposed.Occupied must either have a match between the known values
385     // of Existing and Occupied, or be in Existing.Unused. In the latter case,
386     // the previously added "AnyVal" will match each other.
387     if (!Proposed.Occupied.is_subset(Matches)) {
388       if (OS) {
389         auto Conflicting = Proposed.Occupied.subtract(Matches);
390         auto ExistingConflictingKnown =
391             Existing.Known.intersect_domain(Conflicting);
392         auto ProposedConflictingKnown =
393             Proposed.Known.intersect_domain(Conflicting);
394 
395         OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n";
396         OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n";
397         if (!ExistingConflictingKnown.is_empty())
398           OS->indent(Indent)
399               << "Existing Known:       " << ExistingConflictingKnown << "\n";
400         if (!ProposedConflictingKnown.is_empty())
401           OS->indent(Indent)
402               << "Proposed Known:       " << ProposedConflictingKnown << "\n";
403       }
404       return true;
405     }
406 
407     // Do the writes in Existing conflict with occupied values in Proposed?
408     //
409     // In order to not conflict, it must either write to unused lifetime or
410     // write the same value. To check, we remove the writes that write into
411     // Proposed.Unused (they never conflict) and then see whether the written
412     // value is already in Proposed.Known. If there are multiple known values
413     // and a written value is known under different names, it is enough when one
414     // of the written values (assuming that they are the same value under
415     // different names, e.g. a PHINode and one of the incoming values) matches
416     // one of the known names.
417     //
418     // We convert here the set of lifetimes to actual timepoints. A lifetime is
419     // in conflict with a set of write timepoints, if either a live timepoint is
420     // clearly within the lifetime or if a write happens at the beginning of the
421     // lifetime (where it would conflict with the value that actually writes the
422     // value alive). There is no conflict at the end of a lifetime, as the alive
423     // value will always be read, before it is overwritten again. The last
424     // property holds in Polly for all scalar values and we expect all users of
425     // Knowledge to check this property also for accesses to MemoryKind::Array.
426     auto ProposedFixedDefs =
427         convertZoneToTimepoints(Proposed.Occupied, true, false);
428     auto ProposedFixedKnown =
429         convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false);
430 
431     auto ExistingConflictingWrites =
432         Existing.Written.intersect_domain(ProposedFixedDefs);
433     auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain();
434 
435     auto CommonWrittenVal =
436         ProposedFixedKnown.intersect(ExistingConflictingWrites);
437     auto CommonWrittenValDomain = CommonWrittenVal.domain();
438 
439     if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) {
440       if (OS) {
441         auto ExistingConflictingWritten =
442             ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain);
443         auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain(
444             ExistingConflictingWritten.domain());
445 
446         OS->indent(Indent)
447             << "Proposed a lifetime where there is an Existing write into it\n";
448         OS->indent(Indent) << "Existing conflicting writes: "
449                            << ExistingConflictingWritten << "\n";
450         if (!ProposedConflictingKnown.is_empty())
451           OS->indent(Indent)
452               << "Proposed conflicting known:  " << ProposedConflictingKnown
453               << "\n";
454       }
455       return true;
456     }
457 
458     // Do the writes in Proposed conflict with occupied values in Existing?
459     auto ExistingAvailableDefs =
460         convertZoneToTimepoints(Existing.Unused, true, false);
461     auto ExistingKnownDefs =
462         convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false);
463 
464     auto ProposedWrittenDomain = Proposed.Written.domain();
465     auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written);
466     auto IdenticalOrUnused =
467         ExistingAvailableDefs.unite(KnownIdentical.domain());
468     if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) {
469       if (OS) {
470         auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused);
471         auto ExistingConflictingKnown =
472             ExistingKnownDefs.intersect_domain(Conflicting);
473         auto ProposedConflictingWritten =
474             Proposed.Written.intersect_domain(Conflicting);
475 
476         OS->indent(Indent) << "Proposed writes into range used by Existing\n";
477         OS->indent(Indent) << "Proposed conflicting writes: "
478                            << ProposedConflictingWritten << "\n";
479         if (!ExistingConflictingKnown.is_empty())
480           OS->indent(Indent)
481               << "Existing conflicting known: " << ExistingConflictingKnown
482               << "\n";
483       }
484       return true;
485     }
486 
487     // Does Proposed write at the same time as Existing already does (order of
488     // writes is undefined)? Writing the same value is permitted.
489     auto ExistingWrittenDomain = Existing.Written.domain();
490     auto BothWritten =
491         Existing.Written.domain().intersect(Proposed.Written.domain());
492     auto ExistingKnownWritten = filterKnownValInst(Existing.Written);
493     auto ProposedKnownWritten = filterKnownValInst(Proposed.Written);
494     auto CommonWritten =
495         ExistingKnownWritten.intersect(ProposedKnownWritten).domain();
496 
497     if (!BothWritten.is_subset(CommonWritten)) {
498       if (OS) {
499         auto Conflicting = BothWritten.subtract(CommonWritten);
500         auto ExistingConflictingWritten =
501             Existing.Written.intersect_domain(Conflicting);
502         auto ProposedConflictingWritten =
503             Proposed.Written.intersect_domain(Conflicting);
504 
505         OS->indent(Indent) << "Proposed writes at the same time as an already "
506                               "Existing write\n";
507         OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n";
508         if (!ExistingConflictingWritten.is_empty())
509           OS->indent(Indent)
510               << "Exiting write:      " << ExistingConflictingWritten << "\n";
511         if (!ProposedConflictingWritten.is_empty())
512           OS->indent(Indent)
513               << "Proposed write:     " << ProposedConflictingWritten << "\n";
514       }
515       return true;
516     }
517 
518     return false;
519   }
520 };
521 
522 /// Implementation of the DeLICM/DePRE transformation.
523 class DeLICMImpl : public ZoneAlgorithm {
524 private:
525   /// Knowledge before any transformation took place.
526   Knowledge OriginalZone;
527 
528   /// Current knowledge of the SCoP including all already applied
529   /// transformations.
530   Knowledge Zone;
531 
532   /// Number of StoreInsts something can be mapped to.
533   int NumberOfCompatibleTargets = 0;
534 
535   /// The number of StoreInsts to which at least one value or PHI has been
536   /// mapped to.
537   int NumberOfTargetsMapped = 0;
538 
539   /// The number of llvm::Value mapped to some array element.
540   int NumberOfMappedValueScalars = 0;
541 
542   /// The number of PHIs mapped to some array element.
543   int NumberOfMappedPHIScalars = 0;
544 
545   /// Determine whether two knowledges are conflicting with each other.
546   ///
547   /// @see Knowledge::isConflicting
isConflicting(const Knowledge & Proposed)548   bool isConflicting(const Knowledge &Proposed) {
549     raw_ostream *OS = nullptr;
550     LLVM_DEBUG(OS = &llvm::dbgs());
551     return Knowledge::isConflicting(Zone, Proposed, OS, 4);
552   }
553 
554   /// Determine whether @p SAI is a scalar that can be mapped to an array
555   /// element.
isMappable(const ScopArrayInfo * SAI)556   bool isMappable(const ScopArrayInfo *SAI) {
557     assert(SAI);
558 
559     if (SAI->isValueKind()) {
560       auto *MA = S->getValueDef(SAI);
561       if (!MA) {
562         LLVM_DEBUG(
563             dbgs()
564             << "    Reject because value is read-only within the scop\n");
565         return false;
566       }
567 
568       // Mapping if value is used after scop is not supported. The code
569       // generator would need to reload the scalar after the scop, but it
570       // does not have the information to where it is mapped to. Only the
571       // MemoryAccesses have that information, not the ScopArrayInfo.
572       auto Inst = MA->getAccessInstruction();
573       for (auto User : Inst->users()) {
574         if (!isa<Instruction>(User))
575           return false;
576         auto UserInst = cast<Instruction>(User);
577 
578         if (!S->contains(UserInst)) {
579           LLVM_DEBUG(dbgs() << "    Reject because value is escaping\n");
580           return false;
581         }
582       }
583 
584       return true;
585     }
586 
587     if (SAI->isPHIKind()) {
588       auto *MA = S->getPHIRead(SAI);
589       assert(MA);
590 
591       // Mapping of an incoming block from before the SCoP is not supported by
592       // the code generator.
593       auto PHI = cast<PHINode>(MA->getAccessInstruction());
594       for (auto Incoming : PHI->blocks()) {
595         if (!S->contains(Incoming)) {
596           LLVM_DEBUG(dbgs()
597                      << "    Reject because at least one incoming block is "
598                         "not in the scop region\n");
599           return false;
600         }
601       }
602 
603       return true;
604     }
605 
606     LLVM_DEBUG(dbgs() << "    Reject ExitPHI or other non-value\n");
607     return false;
608   }
609 
610   /// Compute the uses of a MemoryKind::Value and its lifetime (from its
611   /// definition to the last use).
612   ///
613   /// @param SAI The ScopArrayInfo representing the value's storage.
614   ///
615   /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] }
616   ///         First element is the set of uses for each definition.
617   ///         The second is the lifetime of each definition.
618   std::tuple<isl::union_map, isl::map>
computeValueUses(const ScopArrayInfo * SAI)619   computeValueUses(const ScopArrayInfo *SAI) {
620     assert(SAI->isValueKind());
621 
622     // { DomainRead[] }
623     auto Reads = makeEmptyUnionSet();
624 
625     // Find all uses.
626     for (auto *MA : S->getValueUses(SAI))
627       Reads = Reads.unite(getDomainFor(MA));
628 
629     // { DomainRead[] -> Scatter[] }
630     auto ReadSchedule = getScatterFor(Reads);
631 
632     auto *DefMA = S->getValueDef(SAI);
633     assert(DefMA);
634 
635     // { DomainDef[] }
636     auto Writes = getDomainFor(DefMA);
637 
638     // { DomainDef[] -> Scatter[] }
639     auto WriteScatter = getScatterFor(Writes);
640 
641     // { Scatter[] -> DomainDef[] }
642     auto ReachDef = getScalarReachingDefinition(DefMA->getStatement());
643 
644     // { [DomainDef[] -> Scatter[]] -> DomainUse[] }
645     auto Uses = isl::union_map(ReachDef.reverse().range_map())
646                     .apply_range(ReadSchedule.reverse());
647 
648     // { DomainDef[] -> Scatter[] }
649     auto UseScatter =
650         singleton(Uses.domain().unwrap(),
651                   Writes.get_space().map_from_domain_and_range(ScatterSpace));
652 
653     // { DomainDef[] -> Zone[] }
654     auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true);
655 
656     // { DomainDef[] -> DomainRead[] }
657     auto DefUses = Uses.domain_factor_domain();
658 
659     return std::make_pair(DefUses, Lifetime);
660   }
661 
662   /// Try to map a MemoryKind::Value to a given array element.
663   ///
664   /// @param SAI       Representation of the scalar's memory to map.
665   /// @param TargetElt { Scatter[] -> Element[] }
666   ///                  Suggestion where to map a scalar to when at a timepoint.
667   ///
668   /// @return true if the scalar was successfully mapped.
tryMapValue(const ScopArrayInfo * SAI,isl::map TargetElt)669   bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) {
670     assert(SAI->isValueKind());
671 
672     auto *DefMA = S->getValueDef(SAI);
673     assert(DefMA->isValueKind());
674     assert(DefMA->isMustWrite());
675     auto *V = DefMA->getAccessValue();
676     auto *DefInst = DefMA->getAccessInstruction();
677 
678     // Stop if the scalar has already been mapped.
679     if (!DefMA->getLatestScopArrayInfo()->isValueKind())
680       return false;
681 
682     // { DomainDef[] -> Scatter[] }
683     auto DefSched = getScatterFor(DefMA);
684 
685     // Where each write is mapped to, according to the suggestion.
686     // { DomainDef[] -> Element[] }
687     auto DefTarget = TargetElt.apply_domain(DefSched.reverse());
688     simplify(DefTarget);
689     LLVM_DEBUG(dbgs() << "    Def Mapping: " << DefTarget << '\n');
690 
691     auto OrigDomain = getDomainFor(DefMA);
692     auto MappedDomain = DefTarget.domain();
693     if (!OrigDomain.is_subset(MappedDomain)) {
694       LLVM_DEBUG(
695           dbgs()
696           << "    Reject because mapping does not encompass all instances\n");
697       return false;
698     }
699 
700     // { DomainDef[] -> Zone[] }
701     isl::map Lifetime;
702 
703     // { DomainDef[] -> DomainUse[] }
704     isl::union_map DefUses;
705 
706     std::tie(DefUses, Lifetime) = computeValueUses(SAI);
707     LLVM_DEBUG(dbgs() << "    Lifetime: " << Lifetime << '\n');
708 
709     /// { [Element[] -> Zone[]] }
710     auto EltZone = Lifetime.apply_domain(DefTarget).wrap();
711     simplify(EltZone);
712 
713     // When known knowledge is disabled, just return the unknown value. It will
714     // either get filtered out or conflict with itself.
715     // { DomainDef[] -> ValInst[] }
716     isl::map ValInst;
717     if (DelicmComputeKnown)
718       ValInst = makeValInst(V, DefMA->getStatement(),
719                             LI->getLoopFor(DefInst->getParent()));
720     else
721       ValInst = makeUnknownForDomain(DefMA->getStatement());
722 
723     // { DomainDef[] -> [Element[] -> Zone[]] }
724     auto EltKnownTranslator = DefTarget.range_product(Lifetime);
725 
726     // { [Element[] -> Zone[]] -> ValInst[] }
727     auto EltKnown = ValInst.apply_domain(EltKnownTranslator);
728     simplify(EltKnown);
729 
730     // { DomainDef[] -> [Element[] -> Scatter[]] }
731     auto WrittenTranslator = DefTarget.range_product(DefSched);
732 
733     // { [Element[] -> Scatter[]] -> ValInst[] }
734     auto DefEltSched = ValInst.apply_domain(WrittenTranslator);
735     simplify(DefEltSched);
736 
737     Knowledge Proposed(EltZone, {}, filterKnownValInst(EltKnown), DefEltSched);
738     if (isConflicting(Proposed))
739       return false;
740 
741     // { DomainUse[] -> Element[] }
742     auto UseTarget = DefUses.reverse().apply_range(DefTarget);
743 
744     mapValue(SAI, std::move(DefTarget), std::move(UseTarget),
745              std::move(Lifetime), std::move(Proposed));
746     return true;
747   }
748 
749   /// After a scalar has been mapped, update the global knowledge.
applyLifetime(Knowledge Proposed)750   void applyLifetime(Knowledge Proposed) {
751     Zone.learnFrom(std::move(Proposed));
752   }
753 
754   /// Map a MemoryKind::Value scalar to an array element.
755   ///
756   /// Callers must have ensured that the mapping is valid and not conflicting.
757   ///
758   /// @param SAI       The ScopArrayInfo representing the scalar's memory to
759   ///                  map.
760   /// @param DefTarget { DomainDef[] -> Element[] }
761   ///                  The array element to map the scalar to.
762   /// @param UseTarget { DomainUse[] -> Element[] }
763   ///                  The array elements the uses are mapped to.
764   /// @param Lifetime  { DomainDef[] -> Zone[] }
765   ///                  The lifetime of each llvm::Value definition for
766   ///                  reporting.
767   /// @param Proposed  Mapping constraints for reporting.
mapValue(const ScopArrayInfo * SAI,isl::map DefTarget,isl::union_map UseTarget,isl::map Lifetime,Knowledge Proposed)768   void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget,
769                 isl::union_map UseTarget, isl::map Lifetime,
770                 Knowledge Proposed) {
771     // Redirect the read accesses.
772     for (auto *MA : S->getValueUses(SAI)) {
773       // { DomainUse[] }
774       auto Domain = getDomainFor(MA);
775 
776       // { DomainUse[] -> Element[] }
777       auto NewAccRel = UseTarget.intersect_domain(Domain);
778       simplify(NewAccRel);
779 
780       assert(isl_union_map_n_map(NewAccRel.get()) == 1);
781       MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel));
782     }
783 
784     auto *WA = S->getValueDef(SAI);
785     WA->setNewAccessRelation(DefTarget);
786     applyLifetime(Proposed);
787 
788     MappedValueScalars++;
789     NumberOfMappedValueScalars += 1;
790   }
791 
makeValInst(Value * Val,ScopStmt * UserStmt,Loop * Scope,bool IsCertain=true)792   isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope,
793                        bool IsCertain = true) {
794     // When known knowledge is disabled, just return the unknown value. It will
795     // either get filtered out or conflict with itself.
796     if (!DelicmComputeKnown)
797       return makeUnknownForDomain(UserStmt);
798     return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain);
799   }
800 
801   /// Express the incoming values of a PHI for each incoming statement in an
802   /// isl::union_map.
803   ///
804   /// @param SAI The PHI scalar represented by a ScopArrayInfo.
805   ///
806   /// @return { PHIWriteDomain[] -> ValInst[] }
determinePHIWrittenValues(const ScopArrayInfo * SAI)807   isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) {
808     auto Result = makeEmptyUnionMap();
809 
810     // Collect the incoming values.
811     for (auto *MA : S->getPHIIncomings(SAI)) {
812       // { DomainWrite[] -> ValInst[] }
813       isl::union_map ValInst;
814       auto *WriteStmt = MA->getStatement();
815 
816       auto Incoming = MA->getIncoming();
817       assert(!Incoming.empty());
818       if (Incoming.size() == 1) {
819         ValInst = makeValInst(Incoming[0].second, WriteStmt,
820                               LI->getLoopFor(Incoming[0].first));
821       } else {
822         // If the PHI is in a subregion's exit node it can have multiple
823         // incoming values (+ maybe another incoming edge from an unrelated
824         // block). We cannot directly represent it as a single llvm::Value.
825         // We currently model it as unknown value, but modeling as the PHIInst
826         // itself could be OK, too.
827         ValInst = makeUnknownForDomain(WriteStmt);
828       }
829 
830       Result = Result.unite(ValInst);
831     }
832 
833     assert(Result.is_single_valued() &&
834            "Cannot have multiple incoming values for same incoming statement");
835     return Result;
836   }
837 
838   /// Try to map a MemoryKind::PHI scalar to a given array element.
839   ///
840   /// @param SAI       Representation of the scalar's memory to map.
841   /// @param TargetElt { Scatter[] -> Element[] }
842   ///                  Suggestion where to map the scalar to when at a
843   ///                  timepoint.
844   ///
845   /// @return true if the PHI scalar has been mapped.
tryMapPHI(const ScopArrayInfo * SAI,isl::map TargetElt)846   bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) {
847     auto *PHIRead = S->getPHIRead(SAI);
848     assert(PHIRead->isPHIKind());
849     assert(PHIRead->isRead());
850 
851     // Skip if already been mapped.
852     if (!PHIRead->getLatestScopArrayInfo()->isPHIKind())
853       return false;
854 
855     // { DomainRead[] -> Scatter[] }
856     auto PHISched = getScatterFor(PHIRead);
857 
858     // { DomainRead[] -> Element[] }
859     auto PHITarget = PHISched.apply_range(TargetElt);
860     simplify(PHITarget);
861     LLVM_DEBUG(dbgs() << "    Mapping: " << PHITarget << '\n');
862 
863     auto OrigDomain = getDomainFor(PHIRead);
864     auto MappedDomain = PHITarget.domain();
865     if (!OrigDomain.is_subset(MappedDomain)) {
866       LLVM_DEBUG(
867           dbgs()
868           << "    Reject because mapping does not encompass all instances\n");
869       return false;
870     }
871 
872     // { DomainRead[] -> DomainWrite[] }
873     auto PerPHIWrites = computePerPHI(SAI);
874     if (PerPHIWrites.is_null()) {
875       LLVM_DEBUG(
876           dbgs() << "    Reject because cannot determine incoming values\n");
877       return false;
878     }
879 
880     // { DomainWrite[] -> Element[] }
881     auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse();
882     simplify(WritesTarget);
883 
884     // { DomainWrite[] }
885     auto UniverseWritesDom = isl::union_set::empty(ParamSpace.ctx());
886 
887     for (auto *MA : S->getPHIIncomings(SAI))
888       UniverseWritesDom = UniverseWritesDom.unite(getDomainFor(MA));
889 
890     auto RelevantWritesTarget = WritesTarget;
891     if (DelicmOverapproximateWrites)
892       WritesTarget = expandMapping(WritesTarget, UniverseWritesDom);
893 
894     auto ExpandedWritesDom = WritesTarget.domain();
895     if (!DelicmPartialWrites &&
896         !UniverseWritesDom.is_subset(ExpandedWritesDom)) {
897       LLVM_DEBUG(
898           dbgs() << "    Reject because did not find PHI write mapping for "
899                     "all instances\n");
900       if (DelicmOverapproximateWrites)
901         LLVM_DEBUG(dbgs() << "      Relevant Mapping:    "
902                           << RelevantWritesTarget << '\n');
903       LLVM_DEBUG(dbgs() << "      Deduced Mapping:     " << WritesTarget
904                         << '\n');
905       LLVM_DEBUG(dbgs() << "      Missing instances:    "
906                         << UniverseWritesDom.subtract(ExpandedWritesDom)
907                         << '\n');
908       return false;
909     }
910 
911     //  { DomainRead[] -> Scatter[] }
912     isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule);
913     isl::map PerPHIWriteScatter =
914         singleton(PerPHIWriteScatterUmap, PHISched.get_space());
915 
916     // { DomainRead[] -> Zone[] }
917     auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true);
918     simplify(Lifetime);
919     LLVM_DEBUG(dbgs() << "    Lifetime: " << Lifetime << "\n");
920 
921     // { DomainWrite[] -> Zone[] }
922     auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites);
923 
924     // { DomainWrite[] -> ValInst[] }
925     auto WrittenValue = determinePHIWrittenValues(SAI);
926 
927     // { DomainWrite[] -> [Element[] -> Scatter[]] }
928     auto WrittenTranslator = WritesTarget.range_product(Schedule);
929 
930     // { [Element[] -> Scatter[]] -> ValInst[] }
931     auto Written = WrittenValue.apply_domain(WrittenTranslator);
932     simplify(Written);
933 
934     // { DomainWrite[] -> [Element[] -> Zone[]] }
935     auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime);
936 
937     // { DomainWrite[] -> ValInst[] }
938     auto WrittenKnownValue = filterKnownValInst(WrittenValue);
939 
940     // { [Element[] -> Zone[]] -> ValInst[] }
941     auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator);
942     simplify(EltLifetimeInst);
943 
944     // { [Element[] -> Zone[] }
945     auto Occupied = LifetimeTranslator.range();
946     simplify(Occupied);
947 
948     Knowledge Proposed(Occupied, {}, EltLifetimeInst, Written);
949     if (isConflicting(Proposed))
950       return false;
951 
952     mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget),
953            std::move(Lifetime), std::move(Proposed));
954     return true;
955   }
956 
957   /// Map a MemoryKind::PHI scalar to an array element.
958   ///
959   /// Callers must have ensured that the mapping is valid and not conflicting
960   /// with the common knowledge.
961   ///
962   /// @param SAI         The ScopArrayInfo representing the scalar's memory to
963   ///                    map.
964   /// @param ReadTarget  { DomainRead[] -> Element[] }
965   ///                    The array element to map the scalar to.
966   /// @param WriteTarget { DomainWrite[] -> Element[] }
967   ///                    New access target for each PHI incoming write.
968   /// @param Lifetime    { DomainRead[] -> Zone[] }
969   ///                    The lifetime of each PHI for reporting.
970   /// @param Proposed    Mapping constraints for reporting.
mapPHI(const ScopArrayInfo * SAI,isl::map ReadTarget,isl::union_map WriteTarget,isl::map Lifetime,Knowledge Proposed)971   void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget,
972               isl::union_map WriteTarget, isl::map Lifetime,
973               Knowledge Proposed) {
974     // { Element[] }
975     isl::space ElementSpace = ReadTarget.get_space().range();
976 
977     // Redirect the PHI incoming writes.
978     for (auto *MA : S->getPHIIncomings(SAI)) {
979       // { DomainWrite[] }
980       auto Domain = getDomainFor(MA);
981 
982       // { DomainWrite[] -> Element[] }
983       auto NewAccRel = WriteTarget.intersect_domain(Domain);
984       simplify(NewAccRel);
985 
986       isl::space NewAccRelSpace =
987           Domain.get_space().map_from_domain_and_range(ElementSpace);
988       isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace);
989       MA->setNewAccessRelation(NewAccRelMap);
990     }
991 
992     // Redirect the PHI read.
993     auto *PHIRead = S->getPHIRead(SAI);
994     PHIRead->setNewAccessRelation(ReadTarget);
995     applyLifetime(Proposed);
996 
997     MappedPHIScalars++;
998     NumberOfMappedPHIScalars++;
999   }
1000 
1001   /// Search and map scalars to memory overwritten by @p TargetStoreMA.
1002   ///
1003   /// Start trying to map scalars that are used in the same statement as the
1004   /// store. For every successful mapping, try to also map scalars of the
1005   /// statements where those are written. Repeat, until no more mapping
1006   /// opportunity is found.
1007   ///
1008   /// There is currently no preference in which order scalars are tried.
1009   /// Ideally, we would direct it towards a load instruction of the same array
1010   /// element.
collapseScalarsToStore(MemoryAccess * TargetStoreMA)1011   bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) {
1012     assert(TargetStoreMA->isLatestArrayKind());
1013     assert(TargetStoreMA->isMustWrite());
1014 
1015     auto TargetStmt = TargetStoreMA->getStatement();
1016 
1017     // { DomTarget[] }
1018     auto TargetDom = getDomainFor(TargetStmt);
1019 
1020     // { DomTarget[] -> Element[] }
1021     auto TargetAccRel = getAccessRelationFor(TargetStoreMA);
1022 
1023     // { Zone[] -> DomTarget[] }
1024     // For each point in time, find the next target store instance.
1025     auto Target =
1026         computeScalarReachingOverwrite(Schedule, TargetDom, false, true);
1027 
1028     // { Zone[] -> Element[] }
1029     // Use the target store's write location as a suggestion to map scalars to.
1030     auto EltTarget = Target.apply_range(TargetAccRel);
1031     simplify(EltTarget);
1032     LLVM_DEBUG(dbgs() << "    Target mapping is " << EltTarget << '\n');
1033 
1034     // Stack of elements not yet processed.
1035     SmallVector<MemoryAccess *, 16> Worklist;
1036 
1037     // Set of scalars already tested.
1038     SmallPtrSet<const ScopArrayInfo *, 16> Closed;
1039 
1040     // Lambda to add all scalar reads to the work list.
1041     auto ProcessAllIncoming = [&](ScopStmt *Stmt) {
1042       for (auto *MA : *Stmt) {
1043         if (!MA->isLatestScalarKind())
1044           continue;
1045         if (!MA->isRead())
1046           continue;
1047 
1048         Worklist.push_back(MA);
1049       }
1050     };
1051 
1052     auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0);
1053     if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal))
1054       Worklist.push_back(WrittenValInputMA);
1055     else
1056       ProcessAllIncoming(TargetStmt);
1057 
1058     auto AnyMapped = false;
1059     auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout();
1060     auto StoreSize =
1061         DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType());
1062 
1063     while (!Worklist.empty()) {
1064       auto *MA = Worklist.pop_back_val();
1065 
1066       auto *SAI = MA->getScopArrayInfo();
1067       if (Closed.count(SAI))
1068         continue;
1069       Closed.insert(SAI);
1070       LLVM_DEBUG(dbgs() << "\n    Trying to map " << MA << " (SAI: " << SAI
1071                         << ")\n");
1072 
1073       // Skip non-mappable scalars.
1074       if (!isMappable(SAI))
1075         continue;
1076 
1077       auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType());
1078       if (MASize > StoreSize) {
1079         LLVM_DEBUG(
1080             dbgs() << "    Reject because storage size is insufficient\n");
1081         continue;
1082       }
1083 
1084       // Try to map MemoryKind::Value scalars.
1085       if (SAI->isValueKind()) {
1086         if (!tryMapValue(SAI, EltTarget))
1087           continue;
1088 
1089         auto *DefAcc = S->getValueDef(SAI);
1090         ProcessAllIncoming(DefAcc->getStatement());
1091 
1092         AnyMapped = true;
1093         continue;
1094       }
1095 
1096       // Try to map MemoryKind::PHI scalars.
1097       if (SAI->isPHIKind()) {
1098         if (!tryMapPHI(SAI, EltTarget))
1099           continue;
1100         // Add inputs of all incoming statements to the worklist. Prefer the
1101         // input accesses of the incoming blocks.
1102         for (auto *PHIWrite : S->getPHIIncomings(SAI)) {
1103           auto *PHIWriteStmt = PHIWrite->getStatement();
1104           bool FoundAny = false;
1105           for (auto Incoming : PHIWrite->getIncoming()) {
1106             auto *IncomingInputMA =
1107                 PHIWriteStmt->lookupInputAccessOf(Incoming.second);
1108             if (!IncomingInputMA)
1109               continue;
1110 
1111             Worklist.push_back(IncomingInputMA);
1112             FoundAny = true;
1113           }
1114 
1115           if (!FoundAny)
1116             ProcessAllIncoming(PHIWrite->getStatement());
1117         }
1118 
1119         AnyMapped = true;
1120         continue;
1121       }
1122     }
1123 
1124     if (AnyMapped) {
1125       TargetsMapped++;
1126       NumberOfTargetsMapped++;
1127     }
1128     return AnyMapped;
1129   }
1130 
1131   /// Compute when an array element is unused.
1132   ///
1133   /// @return { [Element[] -> Zone[]] }
computeLifetime() const1134   isl::union_set computeLifetime() const {
1135     // { Element[] -> Zone[] }
1136     auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads,
1137                                           false, false, true);
1138 
1139     auto Result = ArrayUnused.wrap();
1140 
1141     simplify(Result);
1142     return Result;
1143   }
1144 
1145   /// Determine when an array element is written to, and which value instance is
1146   /// written.
1147   ///
1148   /// @return { [Element[] -> Scatter[]] -> ValInst[] }
computeWritten() const1149   isl::union_map computeWritten() const {
1150     // { [Element[] -> Scatter[]] -> ValInst[] }
1151     auto EltWritten = applyDomainRange(AllWriteValInst, Schedule);
1152 
1153     simplify(EltWritten);
1154     return EltWritten;
1155   }
1156 
1157   /// Determine whether an access touches at most one element.
1158   ///
1159   /// The accessed element could be a scalar or accessing an array with constant
1160   /// subscript, such that all instances access only that element.
1161   ///
1162   /// @param MA The access to test.
1163   ///
1164   /// @return True, if zero or one elements are accessed; False if at least two
1165   ///         different elements are accessed.
isScalarAccess(MemoryAccess * MA)1166   bool isScalarAccess(MemoryAccess *MA) {
1167     auto Map = getAccessRelationFor(MA);
1168     auto Set = Map.range();
1169     return Set.is_singleton();
1170   }
1171 
1172   /// Print mapping statistics to @p OS.
printStatistics(llvm::raw_ostream & OS,int Indent=0) const1173   void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const {
1174     OS.indent(Indent) << "Statistics {\n";
1175     OS.indent(Indent + 4) << "Compatible overwrites: "
1176                           << NumberOfCompatibleTargets << "\n";
1177     OS.indent(Indent + 4) << "Overwrites mapped to:  " << NumberOfTargetsMapped
1178                           << '\n';
1179     OS.indent(Indent + 4) << "Value scalars mapped:  "
1180                           << NumberOfMappedValueScalars << '\n';
1181     OS.indent(Indent + 4) << "PHI scalars mapped:    "
1182                           << NumberOfMappedPHIScalars << '\n';
1183     OS.indent(Indent) << "}\n";
1184   }
1185 
1186 public:
DeLICMImpl(Scop * S,LoopInfo * LI)1187   DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {}
1188 
1189   /// Calculate the lifetime (definition to last use) of every array element.
1190   ///
1191   /// @return True if the computed lifetimes (#Zone) is usable.
computeZone()1192   bool computeZone() {
1193     // Check that nothing strange occurs.
1194     collectCompatibleElts();
1195 
1196     isl::union_set EltUnused;
1197     isl::union_map EltKnown, EltWritten;
1198 
1199     {
1200       IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps);
1201 
1202       computeCommon();
1203 
1204       EltUnused = computeLifetime();
1205       EltKnown = computeKnown(true, false);
1206       EltWritten = computeWritten();
1207     }
1208     DeLICMAnalyzed++;
1209 
1210     if (EltUnused.is_null() || EltKnown.is_null() || EltWritten.is_null()) {
1211       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota &&
1212              "The only reason that these things have not been computed should "
1213              "be if the max-operations limit hit");
1214       DeLICMOutOfQuota++;
1215       LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n");
1216       DebugLoc Begin, End;
1217       getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End);
1218       OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin,
1219                                    S->getEntry());
1220       R << "maximal number of operations exceeded during zone analysis";
1221       S->getFunction().getContext().diagnose(R);
1222       return false;
1223     }
1224 
1225     Zone = OriginalZone = Knowledge({}, EltUnused, EltKnown, EltWritten);
1226     LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4));
1227 
1228     assert(Zone.isUsable() && OriginalZone.isUsable());
1229     return true;
1230   }
1231 
1232   /// Try to map as many scalars to unused array elements as possible.
1233   ///
1234   /// Multiple scalars might be mappable to intersecting unused array element
1235   /// zones, but we can only chose one. This is a greedy algorithm, therefore
1236   /// the first processed element claims it.
greedyCollapse()1237   void greedyCollapse() {
1238     bool Modified = false;
1239 
1240     for (auto &Stmt : *S) {
1241       for (auto *MA : Stmt) {
1242         if (!MA->isLatestArrayKind())
1243           continue;
1244         if (!MA->isWrite())
1245           continue;
1246 
1247         if (MA->isMayWrite()) {
1248           LLVM_DEBUG(dbgs() << "Access " << MA
1249                             << " pruned because it is a MAY_WRITE\n");
1250           OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite",
1251                                      MA->getAccessInstruction());
1252           R << "Skipped possible mapping target because it is not an "
1253                "unconditional overwrite";
1254           S->getFunction().getContext().diagnose(R);
1255           continue;
1256         }
1257 
1258         if (Stmt.getNumIterators() == 0) {
1259           LLVM_DEBUG(dbgs() << "Access " << MA
1260                             << " pruned because it is not in a loop\n");
1261           OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop",
1262                                      MA->getAccessInstruction());
1263           R << "skipped possible mapping target because it is not in a loop";
1264           S->getFunction().getContext().diagnose(R);
1265           continue;
1266         }
1267 
1268         if (isScalarAccess(MA)) {
1269           LLVM_DEBUG(dbgs()
1270                      << "Access " << MA
1271                      << " pruned because it writes only a single element\n");
1272           OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite",
1273                                      MA->getAccessInstruction());
1274           R << "skipped possible mapping target because the memory location "
1275                "written to does not depend on its outer loop";
1276           S->getFunction().getContext().diagnose(R);
1277           continue;
1278         }
1279 
1280         if (!isa<StoreInst>(MA->getAccessInstruction())) {
1281           LLVM_DEBUG(dbgs() << "Access " << MA
1282                             << " pruned because it is not a StoreInst\n");
1283           OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore",
1284                                      MA->getAccessInstruction());
1285           R << "skipped possible mapping target because non-store instructions "
1286                "are not supported";
1287           S->getFunction().getContext().diagnose(R);
1288           continue;
1289         }
1290 
1291         // Check for more than one element acces per statement instance.
1292         // Currently we expect write accesses to be functional, eg. disallow
1293         //
1294         //   { Stmt[0] -> [i] : 0 <= i < 2 }
1295         //
1296         // This may occur when some accesses to the element write/read only
1297         // parts of the element, eg. a single byte. Polly then divides each
1298         // element into subelements of the smallest access length, normal access
1299         // then touch multiple of such subelements. It is very common when the
1300         // array is accesses with memset, memcpy or memmove which take i8*
1301         // arguments.
1302         isl::union_map AccRel = MA->getLatestAccessRelation();
1303         if (!AccRel.is_single_valued().is_true()) {
1304           LLVM_DEBUG(dbgs() << "Access " << MA
1305                             << " is incompatible because it writes multiple "
1306                                "elements per instance\n");
1307           OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel",
1308                                      MA->getAccessInstruction());
1309           R << "skipped possible mapping target because it writes more than "
1310                "one element";
1311           S->getFunction().getContext().diagnose(R);
1312           continue;
1313         }
1314 
1315         isl::union_set TouchedElts = AccRel.range();
1316         if (!TouchedElts.is_subset(CompatibleElts)) {
1317           LLVM_DEBUG(
1318               dbgs()
1319               << "Access " << MA
1320               << " is incompatible because it touches incompatible elements\n");
1321           OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts",
1322                                      MA->getAccessInstruction());
1323           R << "skipped possible mapping target because a target location "
1324                "cannot be reliably analyzed";
1325           S->getFunction().getContext().diagnose(R);
1326           continue;
1327         }
1328 
1329         assert(isCompatibleAccess(MA));
1330         NumberOfCompatibleTargets++;
1331         LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n");
1332         if (collapseScalarsToStore(MA))
1333           Modified = true;
1334       }
1335     }
1336 
1337     if (Modified)
1338       DeLICMScopsModified++;
1339   }
1340 
1341   /// Dump the internal information about a performed DeLICM to @p OS.
print(llvm::raw_ostream & OS,int Indent=0)1342   void print(llvm::raw_ostream &OS, int Indent = 0) {
1343     if (!Zone.isUsable()) {
1344       OS.indent(Indent) << "Zone not computed\n";
1345       return;
1346     }
1347 
1348     printStatistics(OS, Indent);
1349     if (!isModified()) {
1350       OS.indent(Indent) << "No modification has been made\n";
1351       return;
1352     }
1353     printAccesses(OS, Indent);
1354   }
1355 
1356   /// Return whether at least one transformation been applied.
isModified() const1357   bool isModified() const { return NumberOfTargetsMapped > 0; }
1358 };
1359 
collapseToUnused(Scop & S,LoopInfo & LI)1360 static std::unique_ptr<DeLICMImpl> collapseToUnused(Scop &S, LoopInfo &LI) {
1361   std::unique_ptr<DeLICMImpl> Impl = std::make_unique<DeLICMImpl>(&S, &LI);
1362 
1363   if (!Impl->computeZone()) {
1364     LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n");
1365     return Impl;
1366   }
1367 
1368   LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n");
1369   Impl->greedyCollapse();
1370 
1371   LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1372   LLVM_DEBUG(dbgs() << S);
1373 
1374   return Impl;
1375 }
1376 
runDeLICM(Scop & S,LoopInfo & LI)1377 static std::unique_ptr<DeLICMImpl> runDeLICM(Scop &S, LoopInfo &LI) {
1378   std::unique_ptr<DeLICMImpl> Impl = collapseToUnused(S, LI);
1379 
1380   Scop::ScopStatistics ScopStats = S.getStatistics();
1381   NumValueWrites += ScopStats.NumValueWrites;
1382   NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1383   NumPHIWrites += ScopStats.NumPHIWrites;
1384   NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1385   NumSingletonWrites += ScopStats.NumSingletonWrites;
1386   NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1387 
1388   return Impl;
1389 }
1390 
runDeLICMUsingNPM(Scop & S,ScopAnalysisManager & SAM,ScopStandardAnalysisResults & SAR,SPMUpdater & U,raw_ostream * OS)1391 static PreservedAnalyses runDeLICMUsingNPM(Scop &S, ScopAnalysisManager &SAM,
1392                                            ScopStandardAnalysisResults &SAR,
1393                                            SPMUpdater &U, raw_ostream *OS) {
1394   LoopInfo &LI = SAR.LI;
1395   std::unique_ptr<DeLICMImpl> Impl = runDeLICM(S, LI);
1396 
1397   if (OS) {
1398     *OS << "Printing analysis 'Polly - DeLICM/DePRE' for region: '"
1399         << S.getName() << "' in function '" << S.getFunction().getName()
1400         << "':\n";
1401     if (Impl) {
1402       assert(Impl->getScop() == &S);
1403 
1404       *OS << "DeLICM result:\n";
1405       Impl->print(*OS);
1406     }
1407   }
1408 
1409   if (!Impl->isModified())
1410     return PreservedAnalyses::all();
1411 
1412   PreservedAnalyses PA;
1413   PA.preserveSet<AllAnalysesOn<Module>>();
1414   PA.preserveSet<AllAnalysesOn<Function>>();
1415   PA.preserveSet<AllAnalysesOn<Loop>>();
1416   return PA;
1417 }
1418 
1419 class DeLICMWrapperPass : public ScopPass {
1420 private:
1421   DeLICMWrapperPass(const DeLICMWrapperPass &) = delete;
1422   const DeLICMWrapperPass &operator=(const DeLICMWrapperPass &) = delete;
1423 
1424   /// The pass implementation, also holding per-scop data.
1425   std::unique_ptr<DeLICMImpl> Impl;
1426 
1427 public:
1428   static char ID;
DeLICMWrapperPass()1429   explicit DeLICMWrapperPass() : ScopPass(ID) {}
1430 
getAnalysisUsage(AnalysisUsage & AU) const1431   virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
1432     AU.addRequiredTransitive<ScopInfoRegionPass>();
1433     AU.addRequired<LoopInfoWrapperPass>();
1434     AU.setPreservesAll();
1435   }
1436 
runOnScop(Scop & S)1437   virtual bool runOnScop(Scop &S) override {
1438     // Free resources for previous scop's computation, if not yet done.
1439     releaseMemory();
1440 
1441     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1442     Impl = runDeLICM(S, LI);
1443 
1444     return Impl->isModified();
1445   }
1446 
printScop(raw_ostream & OS,Scop & S) const1447   virtual void printScop(raw_ostream &OS, Scop &S) const override {
1448     if (!Impl)
1449       return;
1450     assert(Impl->getScop() == &S);
1451 
1452     OS << "DeLICM result:\n";
1453     Impl->print(OS);
1454   }
1455 
releaseMemory()1456   virtual void releaseMemory() override { Impl.reset(); }
1457 };
1458 
1459 char DeLICMWrapperPass::ID;
1460 } // anonymous namespace
1461 
createDeLICMWrapperPass()1462 Pass *polly::createDeLICMWrapperPass() { return new DeLICMWrapperPass(); }
1463 
1464 INITIALIZE_PASS_BEGIN(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE",
1465                       false, false)
INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass)1466 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass)
1467 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1468 INITIALIZE_PASS_END(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE",
1469                     false, false)
1470 
1471 llvm::PreservedAnalyses DeLICMPass::run(Scop &S, ScopAnalysisManager &SAM,
1472                                         ScopStandardAnalysisResults &SAR,
1473                                         SPMUpdater &U) {
1474   return runDeLICMUsingNPM(S, SAM, SAR, U, nullptr);
1475 }
1476 
run(Scop & S,ScopAnalysisManager & SAM,ScopStandardAnalysisResults & SAR,SPMUpdater & U)1477 llvm::PreservedAnalyses DeLICMPrinterPass::run(Scop &S,
1478                                                ScopAnalysisManager &SAM,
1479                                                ScopStandardAnalysisResults &SAR,
1480                                                SPMUpdater &U) {
1481   return runDeLICMUsingNPM(S, SAM, SAR, U, &OS);
1482 }
1483 
isConflicting(isl::union_set ExistingOccupied,isl::union_set ExistingUnused,isl::union_map ExistingKnown,isl::union_map ExistingWrites,isl::union_set ProposedOccupied,isl::union_set ProposedUnused,isl::union_map ProposedKnown,isl::union_map ProposedWrites,llvm::raw_ostream * OS,unsigned Indent)1484 bool polly::isConflicting(
1485     isl::union_set ExistingOccupied, isl::union_set ExistingUnused,
1486     isl::union_map ExistingKnown, isl::union_map ExistingWrites,
1487     isl::union_set ProposedOccupied, isl::union_set ProposedUnused,
1488     isl::union_map ProposedKnown, isl::union_map ProposedWrites,
1489     llvm::raw_ostream *OS, unsigned Indent) {
1490   Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused),
1491                      std::move(ExistingKnown), std::move(ExistingWrites));
1492   Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused),
1493                      std::move(ProposedKnown), std::move(ProposedWrites));
1494 
1495   return Knowledge::isConflicting(Existing, Proposed, OS, Indent);
1496 }
1497