1 //===- VectorAnalysis.cpp - Analysis for Vectorization --------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mlir/Analysis/AffineAnalysis.h"
10 #include "mlir/Analysis/LoopAnalysis.h"
11 #include "mlir/Dialect/AffineOps/AffineOps.h"
12 #include "mlir/Dialect/StandardOps/Ops.h"
13 #include "mlir/Dialect/VectorOps/Utils.h"
14 #include "mlir/Dialect/VectorOps/VectorOps.h"
15 #include "mlir/IR/Builders.h"
16 #include "mlir/IR/IntegerSet.h"
17 #include "mlir/IR/Operation.h"
18 #include "mlir/Support/Functional.h"
19 #include "mlir/Support/STLExtras.h"
20 
21 #include "llvm/ADT/DenseSet.h"
22 #include "llvm/ADT/SetVector.h"
23 
24 ///
25 /// Implements Analysis functions specific to vectors which support
26 /// the vectorization and vectorization materialization passes.
27 ///
28 
29 using namespace mlir;
30 
31 using llvm::SetVector;
32 
shapeRatio(ArrayRef<int64_t> superShape,ArrayRef<int64_t> subShape)33 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape,
34                                                    ArrayRef<int64_t> subShape) {
35   if (superShape.size() < subShape.size()) {
36     return Optional<SmallVector<int64_t, 4>>();
37   }
38 
39   // Starting from the end, compute the integer divisors.
40   // Set the boolean `divides` if integral division is not possible.
41   std::vector<int64_t> result;
42   result.reserve(superShape.size());
43   bool divides = true;
44   auto divide = [&divides, &result](int superSize, int subSize) {
45     assert(superSize > 0 && "superSize must be > 0");
46     assert(subSize > 0 && "subSize must be > 0");
47     divides &= (superSize % subSize == 0);
48     result.push_back(superSize / subSize);
49   };
50   functional::zipApply(
51       divide, SmallVector<int64_t, 8>{superShape.rbegin(), superShape.rend()},
52       SmallVector<int64_t, 8>{subShape.rbegin(), subShape.rend()});
53 
54   // If integral division does not occur, return and let the caller decide.
55   if (!divides) {
56     return None;
57   }
58 
59   // At this point we computed the ratio (in reverse) for the common
60   // size. Fill with the remaining entries from the super-vector shape (still in
61   // reverse).
62   int commonSize = subShape.size();
63   std::copy(superShape.rbegin() + commonSize, superShape.rend(),
64             std::back_inserter(result));
65 
66   assert(result.size() == superShape.size() &&
67          "super to sub shape ratio is not of the same size as the super rank");
68 
69   // Reverse again to get it back in the proper order and return.
70   return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
71 }
72 
shapeRatio(VectorType superVectorType,VectorType subVectorType)73 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType,
74                                                    VectorType subVectorType) {
75   assert(superVectorType.getElementType() == subVectorType.getElementType() &&
76          "vector types must be of the same elemental type");
77   return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
78 }
79 
80 /// Constructs a permutation map from memref indices to vector dimension.
81 ///
82 /// The implementation uses the knowledge of the mapping of enclosing loop to
83 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
84 /// map with:
85 ///   - keys representing "vectorized enclosing loops";
86 ///   - values representing the corresponding vector dimension.
87 /// The algorithm traverses "vectorized enclosing loops" and extracts the
88 /// at-most-one MemRef index that is invariant along said loop. This index is
89 /// guaranteed to be at most one by construction: otherwise the MemRef is not
90 /// vectorizable.
91 /// If this invariant index is found, it is added to the permutation_map at the
92 /// proper vector dimension.
93 /// If no index is found to be invariant, 0 is added to the permutation_map and
94 /// corresponds to a vector broadcast along that dimension.
95 ///
96 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
97 /// signalling that no permutation map can be constructed given
98 /// `enclosingLoopToVectorDim`.
99 ///
100 /// Examples can be found in the documentation of `makePermutationMap`, in the
101 /// header file.
makePermutationMap(ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & enclosingLoopToVectorDim)102 static AffineMap makePermutationMap(
103     ArrayRef<Value> indices,
104     const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
105   if (enclosingLoopToVectorDim.empty())
106     return AffineMap();
107   MLIRContext *context =
108       enclosingLoopToVectorDim.begin()->getFirst()->getContext();
109   using functional::makePtrDynCaster;
110   using functional::map;
111   SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
112                                   getAffineConstantExpr(0, context));
113 
114   for (auto kvp : enclosingLoopToVectorDim) {
115     assert(kvp.second < perm.size());
116     auto invariants = getInvariantAccesses(
117         cast<AffineForOp>(kvp.first).getInductionVar(), indices);
118     unsigned numIndices = indices.size();
119     unsigned countInvariantIndices = 0;
120     for (unsigned dim = 0; dim < numIndices; ++dim) {
121       if (!invariants.count(indices[dim])) {
122         assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
123                "permutationMap already has an entry along dim");
124         perm[kvp.second] = getAffineDimExpr(dim, context);
125       } else {
126         ++countInvariantIndices;
127       }
128     }
129     assert((countInvariantIndices == numIndices ||
130             countInvariantIndices == numIndices - 1) &&
131            "Vectorization prerequisite violated: at most 1 index may be "
132            "invariant wrt a vectorized loop");
133   }
134   return AffineMap::get(indices.size(), 0, perm);
135 }
136 
137 /// Implementation detail that walks up the parents and records the ones with
138 /// the specified type.
139 /// TODO(ntv): could also be implemented as a collect parents followed by a
140 /// filter and made available outside this file.
141 template <typename T>
getParentsOfType(Operation * op)142 static SetVector<Operation *> getParentsOfType(Operation *op) {
143   SetVector<Operation *> res;
144   auto *current = op;
145   while (auto *parent = current->getParentOp()) {
146     if (auto typedParent = dyn_cast<T>(parent)) {
147       assert(res.count(parent) == 0 && "Already inserted");
148       res.insert(parent);
149     }
150     current = parent;
151   }
152   return res;
153 }
154 
155 /// Returns the enclosing AffineForOp, from closest to farthest.
getEnclosingforOps(Operation * op)156 static SetVector<Operation *> getEnclosingforOps(Operation *op) {
157   return getParentsOfType<AffineForOp>(op);
158 }
159 
makePermutationMap(Operation * op,ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & loopToVectorDim)160 AffineMap mlir::makePermutationMap(
161     Operation *op, ArrayRef<Value> indices,
162     const DenseMap<Operation *, unsigned> &loopToVectorDim) {
163   DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
164   auto enclosingLoops = getEnclosingforOps(op);
165   for (auto *forInst : enclosingLoops) {
166     auto it = loopToVectorDim.find(forInst);
167     if (it != loopToVectorDim.end()) {
168       enclosingLoopToVectorDim.insert(*it);
169     }
170   }
171   return ::makePermutationMap(indices, enclosingLoopToVectorDim);
172 }
173 
operatesOnSuperVectorsOf(Operation & op,VectorType subVectorType)174 bool mlir::matcher::operatesOnSuperVectorsOf(Operation &op,
175                                              VectorType subVectorType) {
176   // First, extract the vector type and distinguish between:
177   //   a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
178   //      vector.transfer_write); and
179   //   b. ops that *may* lower a super-vector (all other ops).
180   // The ops that *may* lower a super-vector only do so if the super-vector to
181   // sub-vector ratio exists. The ops that *must* lower a super-vector are
182   // explicitly checked for this property.
183   /// TODO(ntv): there should be a single function for all ops to do this so we
184   /// do not have to special case. Maybe a trait, or just a method, unclear atm.
185   bool mustDivide = false;
186   (void)mustDivide;
187   VectorType superVectorType;
188   if (auto read = dyn_cast<vector::TransferReadOp>(op)) {
189     superVectorType = read.getVectorType();
190     mustDivide = true;
191   } else if (auto write = dyn_cast<vector::TransferWriteOp>(op)) {
192     superVectorType = write.getVectorType();
193     mustDivide = true;
194   } else if (op.getNumResults() == 0) {
195     if (!isa<ReturnOp>(op)) {
196       op.emitError("NYI: assuming only return operations can have 0 "
197                    " results at this point");
198     }
199     return false;
200   } else if (op.getNumResults() == 1) {
201     if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) {
202       superVectorType = v;
203     } else {
204       // Not a vector type.
205       return false;
206     }
207   } else {
208     // Not a vector.transfer and has more than 1 result, fail hard for now to
209     // wake us up when something changes.
210     op.emitError("NYI: operation has more than 1 result");
211     return false;
212   }
213 
214   // Get the ratio.
215   auto ratio = shapeRatio(superVectorType, subVectorType);
216 
217   // Sanity check.
218   assert((ratio.hasValue() || !mustDivide) &&
219          "vector.transfer operation in which super-vector size is not an"
220          " integer multiple of sub-vector size");
221 
222   // This catches cases that are not strictly necessary to have multiplicity but
223   // still aren't divisible by the sub-vector shape.
224   // This could be useful information if we wanted to reshape at the level of
225   // the vector type (but we would have to look at the compute and distinguish
226   // between parallel, reduction and possibly other cases.
227   if (!ratio.hasValue()) {
228     return false;
229   }
230 
231   return true;
232 }
233