1 //===- VectorUtils.cpp - MLIR Utilities for VectorOps   ------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utility methods for working with the Vector dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Dialect/Vector/VectorUtils.h"
14 #include "mlir/Analysis/LoopAnalysis.h"
15 #include "mlir/Dialect/Affine/IR/AffineOps.h"
16 #include "mlir/Dialect/StandardOps/IR/Ops.h"
17 #include "mlir/Dialect/Vector/VectorOps.h"
18 #include "mlir/IR/Builders.h"
19 #include "mlir/IR/IntegerSet.h"
20 #include "mlir/IR/Operation.h"
21 #include "mlir/Support/LLVM.h"
22 #include "mlir/Support/MathExtras.h"
23 #include <numeric>
24 
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/SetVector.h"
27 
28 using llvm::SetVector;
29 
30 using namespace mlir;
31 
32 /// Return the number of elements of basis, `0` if empty.
computeMaxLinearIndex(ArrayRef<int64_t> basis)33 int64_t mlir::computeMaxLinearIndex(ArrayRef<int64_t> basis) {
34   if (basis.empty())
35     return 0;
36   return std::accumulate(basis.begin(), basis.end(), 1,
37                          std::multiplies<int64_t>());
38 }
39 
40 /// Given a shape with sizes greater than 0 along all dimensions,
41 /// return the distance, in number of elements, between a slice in a dimension
42 /// and the next slice in the same dimension.
43 ///   e.g. shape[3, 4, 5] -> linearization_basis[20, 5, 1]
computeStrides(ArrayRef<int64_t> shape)44 SmallVector<int64_t, 8> mlir::computeStrides(ArrayRef<int64_t> shape) {
45   if (shape.empty())
46     return {};
47   SmallVector<int64_t, 8> tmp;
48   tmp.reserve(shape.size());
49   int64_t running = 1;
50   for (auto size : llvm::reverse(shape)) {
51     assert(size > 0 && "size must be nonnegative");
52     tmp.push_back(running);
53     running *= size;
54   }
55   return SmallVector<int64_t, 8>(tmp.rbegin(), tmp.rend());
56 }
57 
computeStrides(ArrayRef<int64_t> shape,ArrayRef<int64_t> sizes)58 SmallVector<int64_t, 4> mlir::computeStrides(ArrayRef<int64_t> shape,
59                                              ArrayRef<int64_t> sizes) {
60   int64_t rank = shape.size();
61   // Compute the count for each dimension.
62   SmallVector<int64_t, 4> sliceDimCounts(rank);
63   for (int64_t r = 0; r < rank; ++r)
64     sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);
65   // Use that to compute the slice stride for each dimension.
66   SmallVector<int64_t, 4> sliceStrides(rank);
67   sliceStrides[rank - 1] = 1;
68   for (int64_t r = rank - 2; r >= 0; --r)
69     sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1];
70   return sliceStrides;
71 }
72 
linearize(ArrayRef<int64_t> offsets,ArrayRef<int64_t> basis)73 int64_t mlir::linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) {
74   assert(offsets.size() == basis.size());
75   int64_t linearIndex = 0;
76   for (unsigned idx = 0, e = basis.size(); idx < e; ++idx)
77     linearIndex += offsets[idx] * basis[idx];
78   return linearIndex;
79 }
80 
delinearize(ArrayRef<int64_t> sliceStrides,int64_t index)81 SmallVector<int64_t, 4> mlir::delinearize(ArrayRef<int64_t> sliceStrides,
82                                           int64_t index) {
83   int64_t rank = sliceStrides.size();
84   SmallVector<int64_t, 4> vectorOffsets(rank);
85   for (int64_t r = 0; r < rank; ++r) {
86     assert(sliceStrides[r] > 0);
87     vectorOffsets[r] = index / sliceStrides[r];
88     index %= sliceStrides[r];
89   }
90   return vectorOffsets;
91 }
92 
computeElementOffsetsFromVectorSliceOffsets(ArrayRef<int64_t> sizes,ArrayRef<int64_t> vectorOffsets)93 SmallVector<int64_t, 4> mlir::computeElementOffsetsFromVectorSliceOffsets(
94     ArrayRef<int64_t> sizes, ArrayRef<int64_t> vectorOffsets) {
95   SmallVector<int64_t, 4> result;
96   for (auto it : llvm::zip(vectorOffsets, sizes))
97     result.push_back(std::get<0>(it) * std::get<1>(it));
98   return result;
99 }
100 
101 SmallVector<int64_t, 4>
computeSliceSizes(ArrayRef<int64_t> shape,ArrayRef<int64_t> sizes,ArrayRef<int64_t> elementOffsets)102 mlir::computeSliceSizes(ArrayRef<int64_t> shape, ArrayRef<int64_t> sizes,
103                         ArrayRef<int64_t> elementOffsets) {
104   int64_t rank = shape.size();
105   SmallVector<int64_t, 4> sliceSizes(rank);
106   for (unsigned r = 0; r < rank; ++r)
107     sliceSizes[r] = std::min(sizes[r], shape[r] - elementOffsets[r]);
108   return sliceSizes;
109 }
110 
shapeRatio(ArrayRef<int64_t> superShape,ArrayRef<int64_t> subShape)111 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(ArrayRef<int64_t> superShape,
112                                                    ArrayRef<int64_t> subShape) {
113   if (superShape.size() < subShape.size()) {
114     return Optional<SmallVector<int64_t, 4>>();
115   }
116 
117   // Starting from the end, compute the integer divisors.
118   std::vector<int64_t> result;
119   result.reserve(superShape.size());
120   int64_t superSize = 0, subSize = 0;
121   for (auto it :
122        llvm::zip(llvm::reverse(superShape), llvm::reverse(subShape))) {
123     std::tie(superSize, subSize) = it;
124     assert(superSize > 0 && "superSize must be > 0");
125     assert(subSize > 0 && "subSize must be > 0");
126 
127     // If integral division does not occur, return and let the caller decide.
128     if (superSize % subSize != 0)
129       return None;
130     result.push_back(superSize / subSize);
131   }
132 
133   // At this point we computed the ratio (in reverse) for the common
134   // size. Fill with the remaining entries from the super-vector shape (still in
135   // reverse).
136   int commonSize = subShape.size();
137   std::copy(superShape.rbegin() + commonSize, superShape.rend(),
138             std::back_inserter(result));
139 
140   assert(result.size() == superShape.size() &&
141          "super to sub shape ratio is not of the same size as the super rank");
142 
143   // Reverse again to get it back in the proper order and return.
144   return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
145 }
146 
shapeRatio(VectorType superVectorType,VectorType subVectorType)147 Optional<SmallVector<int64_t, 4>> mlir::shapeRatio(VectorType superVectorType,
148                                                    VectorType subVectorType) {
149   assert(superVectorType.getElementType() == subVectorType.getElementType() &&
150          "vector types must be of the same elemental type");
151   return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
152 }
153 
154 /// Constructs a permutation map from memref indices to vector dimension.
155 ///
156 /// The implementation uses the knowledge of the mapping of enclosing loop to
157 /// vector dimension. `enclosingLoopToVectorDim` carries this information as a
158 /// map with:
159 ///   - keys representing "vectorized enclosing loops";
160 ///   - values representing the corresponding vector dimension.
161 /// The algorithm traverses "vectorized enclosing loops" and extracts the
162 /// at-most-one MemRef index that is invariant along said loop. This index is
163 /// guaranteed to be at most one by construction: otherwise the MemRef is not
164 /// vectorizable.
165 /// If this invariant index is found, it is added to the permutation_map at the
166 /// proper vector dimension.
167 /// If no index is found to be invariant, 0 is added to the permutation_map and
168 /// corresponds to a vector broadcast along that dimension.
169 ///
170 /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
171 /// signalling that no permutation map can be constructed given
172 /// `enclosingLoopToVectorDim`.
173 ///
174 /// Examples can be found in the documentation of `makePermutationMap`, in the
175 /// header file.
makePermutationMap(ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & enclosingLoopToVectorDim)176 static AffineMap makePermutationMap(
177     ArrayRef<Value> indices,
178     const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
179   if (enclosingLoopToVectorDim.empty())
180     return AffineMap();
181   MLIRContext *context =
182       enclosingLoopToVectorDim.begin()->getFirst()->getContext();
183   SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
184                                   getAffineConstantExpr(0, context));
185 
186   for (auto kvp : enclosingLoopToVectorDim) {
187     assert(kvp.second < perm.size());
188     auto invariants = getInvariantAccesses(
189         cast<AffineForOp>(kvp.first).getInductionVar(), indices);
190     unsigned numIndices = indices.size();
191     unsigned countInvariantIndices = 0;
192     for (unsigned dim = 0; dim < numIndices; ++dim) {
193       if (!invariants.count(indices[dim])) {
194         assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
195                "permutationMap already has an entry along dim");
196         perm[kvp.second] = getAffineDimExpr(dim, context);
197       } else {
198         ++countInvariantIndices;
199       }
200     }
201     assert((countInvariantIndices == numIndices ||
202             countInvariantIndices == numIndices - 1) &&
203            "Vectorization prerequisite violated: at most 1 index may be "
204            "invariant wrt a vectorized loop");
205   }
206   return AffineMap::get(indices.size(), 0, perm, context);
207 }
208 
209 /// Implementation detail that walks up the parents and records the ones with
210 /// the specified type.
211 /// TODO: could also be implemented as a collect parents followed by a
212 /// filter and made available outside this file.
213 template <typename T>
getParentsOfType(Operation * op)214 static SetVector<Operation *> getParentsOfType(Operation *op) {
215   SetVector<Operation *> res;
216   auto *current = op;
217   while (auto *parent = current->getParentOp()) {
218     if (auto typedParent = dyn_cast<T>(parent)) {
219       assert(res.count(parent) == 0 && "Already inserted");
220       res.insert(parent);
221     }
222     current = parent;
223   }
224   return res;
225 }
226 
227 /// Returns the enclosing AffineForOp, from closest to farthest.
getEnclosingforOps(Operation * op)228 static SetVector<Operation *> getEnclosingforOps(Operation *op) {
229   return getParentsOfType<AffineForOp>(op);
230 }
231 
makePermutationMap(Operation * op,ArrayRef<Value> indices,const DenseMap<Operation *,unsigned> & loopToVectorDim)232 AffineMap mlir::makePermutationMap(
233     Operation *op, ArrayRef<Value> indices,
234     const DenseMap<Operation *, unsigned> &loopToVectorDim) {
235   DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
236   auto enclosingLoops = getEnclosingforOps(op);
237   for (auto *forInst : enclosingLoops) {
238     auto it = loopToVectorDim.find(forInst);
239     if (it != loopToVectorDim.end()) {
240       enclosingLoopToVectorDim.insert(*it);
241     }
242   }
243   return ::makePermutationMap(indices, enclosingLoopToVectorDim);
244 }
245 
getTransferMinorIdentityMap(ShapedType shapedType,VectorType vectorType)246 AffineMap mlir::getTransferMinorIdentityMap(ShapedType shapedType,
247                                             VectorType vectorType) {
248   int64_t elementVectorRank = 0;
249   VectorType elementVectorType =
250       shapedType.getElementType().dyn_cast<VectorType>();
251   if (elementVectorType)
252     elementVectorRank += elementVectorType.getRank();
253   return AffineMap::getMinorIdentityMap(
254       shapedType.getRank(), vectorType.getRank() - elementVectorRank,
255       shapedType.getContext());
256 }
257 
operatesOnSuperVectorsOf(Operation & op,VectorType subVectorType)258 bool matcher::operatesOnSuperVectorsOf(Operation &op,
259                                        VectorType subVectorType) {
260   // First, extract the vector type and distinguish between:
261   //   a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
262   //      vector.transfer_write); and
263   //   b. ops that *may* lower a super-vector (all other ops).
264   // The ops that *may* lower a super-vector only do so if the super-vector to
265   // sub-vector ratio exists. The ops that *must* lower a super-vector are
266   // explicitly checked for this property.
267   /// TODO: there should be a single function for all ops to do this so we
268   /// do not have to special case. Maybe a trait, or just a method, unclear atm.
269   bool mustDivide = false;
270   (void)mustDivide;
271   VectorType superVectorType;
272   if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
273     superVectorType = transfer.getVectorType();
274     mustDivide = true;
275   } else if (op.getNumResults() == 0) {
276     if (!isa<ReturnOp>(op)) {
277       op.emitError("NYI: assuming only return operations can have 0 "
278                    " results at this point");
279     }
280     return false;
281   } else if (op.getNumResults() == 1) {
282     if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) {
283       superVectorType = v;
284     } else {
285       // Not a vector type.
286       return false;
287     }
288   } else {
289     // Not a vector.transfer and has more than 1 result, fail hard for now to
290     // wake us up when something changes.
291     op.emitError("NYI: operation has more than 1 result");
292     return false;
293   }
294 
295   // Get the ratio.
296   auto ratio = shapeRatio(superVectorType, subVectorType);
297 
298   // Sanity check.
299   assert((ratio.hasValue() || !mustDivide) &&
300          "vector.transfer operation in which super-vector size is not an"
301          " integer multiple of sub-vector size");
302 
303   // This catches cases that are not strictly necessary to have multiplicity but
304   // still aren't divisible by the sub-vector shape.
305   // This could be useful information if we wanted to reshape at the level of
306   // the vector type (but we would have to look at the compute and distinguish
307   // between parallel, reduction and possibly other cases.
308   if (!ratio.hasValue()) {
309     return false;
310   }
311 
312   return true;
313 }
314 
isDisjointTransferIndices(VectorTransferOpInterface transferA,VectorTransferOpInterface transferB)315 bool mlir::isDisjointTransferIndices(VectorTransferOpInterface transferA,
316                                      VectorTransferOpInterface transferB) {
317   // For simplicity only look at transfer of same type.
318   if (transferA.getVectorType() != transferB.getVectorType())
319     return false;
320   unsigned rankOffset = transferA.getLeadingShapedRank();
321   for (unsigned i = 0, e = transferA.indices().size(); i < e; i++) {
322     auto indexA = transferA.indices()[i].getDefiningOp<ConstantOp>();
323     auto indexB = transferB.indices()[i].getDefiningOp<ConstantOp>();
324     // If any of the indices are dynamic we cannot prove anything.
325     if (!indexA || !indexB)
326       continue;
327 
328     if (i < rankOffset) {
329       // For leading dimensions, if we can prove that index are different we
330       // know we are accessing disjoint slices.
331       if (indexA.getValue().cast<IntegerAttr>().getInt() !=
332           indexB.getValue().cast<IntegerAttr>().getInt())
333         return true;
334     } else {
335       // For this dimension, we slice a part of the memref we need to make sure
336       // the intervals accessed don't overlap.
337       int64_t distance =
338           std::abs(indexA.getValue().cast<IntegerAttr>().getInt() -
339                    indexB.getValue().cast<IntegerAttr>().getInt());
340       if (distance >= transferA.getVectorType().getDimSize(i - rankOffset))
341         return true;
342     }
343   }
344   return false;
345 }
346 
isDisjointTransferSet(VectorTransferOpInterface transferA,VectorTransferOpInterface transferB)347 bool mlir::isDisjointTransferSet(VectorTransferOpInterface transferA,
348                                  VectorTransferOpInterface transferB) {
349   if (transferA.source() != transferB.source())
350     return false;
351   return isDisjointTransferIndices(transferA, transferB);
352 }
353