1//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines MLIR vector operations.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef VECTOR_OPS
14#define VECTOR_OPS
15
16include "mlir/Interfaces/SideEffectInterfaces.td"
17include "mlir/Interfaces/VectorInterfaces.td"
18include "mlir/Interfaces/ViewLikeInterface.td"
19
20def Vector_Dialect : Dialect {
21  let name = "vector";
22  let cppNamespace = "::mlir::vector";
23  let hasConstantMaterializer = 1;
24}
25
26// Base class for Vector dialect ops.
27class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
28    Op<Vector_Dialect, mnemonic, traits> {
29  // For every vector op, there needs to be a:
30  //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
31  //   * LogicalResult verify(${C++ class of Op} op)
32  //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
33  //                                         OperationState &result)
34  // functions.
35  let printer = [{ return ::print(p, *this); }];
36  let verifier = [{ return ::verify(*this); }];
37  let parser = [{ return ::parse$cppClass(parser, result); }];
38}
39
40// The "kind" of combining function for contractions and reductions.
41def COMBINING_KIND_ADD : BitEnumAttrCase<"ADD", 0x1,  "add">;
42def COMBINING_KIND_MUL : BitEnumAttrCase<"MUL", 0x2,  "mul">;
43def COMBINING_KIND_MIN : BitEnumAttrCase<"MIN", 0x4,  "min">;
44def COMBINING_KIND_MAX : BitEnumAttrCase<"MAX", 0x8,  "max">;
45def COMBINING_KIND_AND : BitEnumAttrCase<"AND", 0x10, "and">;
46def COMBINING_KIND_OR  : BitEnumAttrCase<"OR",  0x20, "or">;
47def COMBINING_KIND_XOR : BitEnumAttrCase<"XOR", 0x40, "xor">;
48
49def CombiningKind : BitEnumAttr<
50    "CombiningKind",
51    "Kind of combining function for contractions and reductions",
52    [COMBINING_KIND_ADD, COMBINING_KIND_MUL, COMBINING_KIND_MIN,
53     COMBINING_KIND_MAX, COMBINING_KIND_AND, COMBINING_KIND_OR,
54     COMBINING_KIND_XOR]> {
55  let cppNamespace = "::mlir::vector";
56  let genSpecializedAttr = 0;
57}
58
59def Vector_CombiningKindAttr : DialectAttr<
60    Vector_Dialect,
61    CPred<"$_self.isa<::mlir::vector::CombiningKindAttr>()">,
62    "Kind of combining function for contractions and reductions"> {
63  let storageType = "::mlir::vector::CombiningKindAttr";
64  let returnType = "::mlir::vector::CombiningKind";
65  let convertFromStorage = "$_self.getKind()";
66  let constBuilderCall =
67          "::mlir::vector::CombiningKindAttr::get($0, $_builder.getContext())";
68}
69
70// TODO: Add an attribute to specify a different algebra with operators other
71// than the current set: {*, +}.
72def Vector_ContractionOp :
73  Vector_Op<"contract", [
74      NoSideEffect,
75      PredOpTrait<"lhs and rhs have same element type", TCopVTEtIsSameAs<0, 1>>,
76      PredOpTrait<"third operand acc and result have same element type",
77                  TCresVTEtIsSameAsOpBase<0, 2>>,
78      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
79    ]>,
80    Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyType:$acc,
81               Variadic<VectorOf<[I1]>>:$masks,
82               AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types,
83               DefaultValuedAttr<Vector_CombiningKindAttr,
84                                 "CombiningKind::ADD">:$kind)>,
85    Results<(outs AnyType)> {
86  let summary = "vector contraction operation";
87  let description = [{
88    Computes the sum of products of vector elements along contracting
89    dimension pairs from 2 vectors of rank M and N respectively, adds this
90    intermediate result to the accumulator argument of rank K, and returns a
91    vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
92    num_batch_dims (see dimension type descriptions below)). For K = 0 (no
93    free or batch dimensions), the accumulator and output are a scalar.
94
95    Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp)
96    specify the dynamic dimension sizes of valid data within the lhs/rhs vector
97    arguments.
98
99    An iterator type attribute list must be specified, where each element of
100    the list represents an iterator with one of the following types:
101
102    *) "reduction": reduction dimensions are present in the lhs and rhs
103                    arguments but not in the output (and accumulator
104                    argument). These are the dimensions along which the vector
105                    contraction op computes the sum of products, and
106                    contracting dimension pair dimension sizes must match
107                    between lhs/rhs.
108    *) "parallel": Batch dimensions are iterator type "parallel", and
109                   are non-contracting dimensions present in the lhs, rhs and
110                   output. The lhs/rhs co-iterate along the batch dimensions,
111                   which should be expressed in their indexing maps.
112
113                   Free dimensions are iterator type "parallel", and are
114                   non-contraction, non-batch dimensions accessed by either the
115                   lhs or rhs (but not both). The lhs and rhs free dimensions
116                   are unrelated to each other and do not co-iterate, which
117                   should be expressed in their indexing maps.
118
119    An indexing map attribute list must be specified with an entry for lhs, rhs
120    and acc arguments. An indexing map attribute specifies a mapping from each
121    iterator in the iterator type list, to each dimension of an N-D vector.
122
123    An optional kind attribute may be used to specify the combining function
124    between the intermediate result and accumulator argument of rank K. This
125    attribute can take the values add/mul/min/max for int/fp, and/or/xor for
126    int only. The default is "add".
127
128    Example:
129
130    ```mlir
131    // Simple DOT product (K = 0).
132    #contraction_accesses = [
133     affine_map<(i) -> (i)>,
134     affine_map<(i) -> (i)>,
135     affine_map<(i) -> ()>
136    ]
137    #contraction_trait = {
138      indexing_maps = #contraction_accesses,
139      iterator_types = ["reduction"]
140    }
141    %3 = vector.contract #contraction_trait %0, %1, %2
142      : vector<10xf32>, vector<10xf32> into f32
143
144    // 2D vector contraction with one contracting dimension (matmul, K = 2).
145    #contraction_accesses = [
146      affine_map<(i, j, k) -> (i, k)>,
147      affine_map<(i, j, k) -> (k, j)>,
148      affine_map<(i, j, k) -> (i, j)>
149    ]
150    #contraction_trait = {
151      indexing_maps = #contraction_accesses,
152      iterator_types = ["parallel", "parallel", "reduction"]
153    }
154
155    %3 = vector.contract #contraction_trait %0, %1, %2
156      : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
157
158    // 4D to 3D vector contraction with two contracting dimensions and
159    // one batch dimension (K = 3).
160    #contraction_accesses = [
161      affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
162      affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
163      affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
164    ]
165    #contraction_trait = {
166      indexing_maps = #contraction_accesses,
167      iterator_types = ["parallel", "parallel", "parallel",
168                        "reduction", "reduction"]
169    }
170
171    %4 = vector.contract #contraction_trait %0, %1, %2
172        : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
173
174    // 4D vector contraction with two contracting dimensions and optional
175    // vector mask arguments.
176    %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
177    %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
178
179    %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
180       : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
181
182    // Vector contraction with mixed typed. lhs/rhs have different element
183    // types than accumulator/result.
184    %6 = vector.contract #contraction_trait %0, %1, %2
185      : vector<10xf16>, vector<10xf16> into f32
186
187    // Contract with max (K = 0).
188    #contraction_accesses = [
189     affine_map<(i) -> (i)>,
190     affine_map<(i) -> (i)>,
191     affine_map<(i) -> ()>
192    ]
193    #contraction_trait = {
194      indexing_maps = #contraction_accesses,
195      iterator_types = ["reduction"],
196      kind = #vector.kind<max>
197    }
198    %7 = vector.contract #contraction_trait %0, %1, %2
199      : vector<10xf32>, vector<10xf32> into f32
200    ```
201  }];
202  let builders = [
203    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
204      "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes)>,
205    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
206      "ArrayRef<ArrayRef<AffineExpr>>":$indexingExprs,
207      "ArrayRef<StringRef>":$iteratorTypes)>
208  ];
209  let extraClassDeclaration = [{
210    VectorType getLhsType() {
211      return lhs().getType().cast<VectorType>();
212    }
213    VectorType getRhsType() {
214      return rhs().getType().cast<VectorType>();
215    }
216    Type getAccType() { return acc().getType(); }
217    VectorType getLHSVectorMaskType() {
218      if (llvm::size(masks()) != 2) return VectorType();
219      return getOperand(3).getType().cast<VectorType>();
220    }
221    VectorType getRHSVectorMaskType() {
222      if (llvm::size(masks()) != 2) return VectorType();
223      return getOperand(4).getType().cast<VectorType>();
224    }
225    Type getResultType() { return getResult().getType(); }
226    ArrayRef<StringRef> getTraitAttrNames();
227    SmallVector<AffineMap, 4> getIndexingMaps();
228    static unsigned getAccOperandIndex() { return 2; }
229
230    // Returns the bounds of each dimension in the iteration space spanned
231    // by the iterator types of this operation.
232    void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);
233
234    // Returns a list of index maps, where there is a list entry for each
235    // op indexing map attribute (i.e. one for each input and output, with
236    // the output listed last). Each index map, maps from this operations
237    // iteration space, to vector dimensions of the maps input/output.
238    void getIterationIndexMap(
239      std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);
240
241    std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
242    std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
243
244    static constexpr StringRef getKindAttrName() { return "kind"; }
245
246    static CombiningKind getDefaultKind() {
247      return CombiningKind::ADD;
248    }
249  }];
250
251  let hasCanonicalizer = 1;
252}
253
254def Vector_ReductionOp :
255  Vector_Op<"reduction", [NoSideEffect,
256     PredOpTrait<"source operand and result have same element type",
257                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
258    Arguments<(ins StrAttr:$kind, AnyVector:$vector, Variadic<AnyType>:$acc)>,
259    Results<(outs AnyType:$dest)> {
260  let summary = "reduction operation";
261  let description = [{
262    Reduces an 1-D vector "horizontally" into a scalar using the given
263    operation (add/mul/min/max for int/fp and and/or/xor for int only).
264    Some reductions (add/mul for fp) also allow an optional fused
265    accumulator.
266
267    Note that these operations are restricted to 1-D vectors to remain
268    close to the corresponding LLVM intrinsics:
269
270    http://llvm.org/docs/LangRef.html#vector-reduction-intrinsics
271
272    Example:
273
274    ```mlir
275    %1 = vector.reduction "add", %0 : vector<16xf32> into f32
276
277    %3 = vector.reduction "xor", %2 : vector<4xi32> into i32
278
279    %4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32
280    ```
281  }];
282  let extraClassDeclaration = [{
283    VectorType getVectorType() {
284      return vector().getType().cast<VectorType>();
285    }
286  }];
287}
288
289def Vector_MultiDimReductionOp :
290  Vector_Op<"multi_reduction", [NoSideEffect,
291     PredOpTrait<"source operand and result have same element type",
292                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
293    Arguments<(ins Vector_CombiningKindAttr:$kind,
294                   AnyVector:$source,
295                   I64ArrayAttr:$reduction_dims)>,
296    Results<(outs AnyType:$dest)> {
297  let summary = "Multi-dimensional reduction operation";
298  let description = [{
299    Reduces an n-D vector into an (n-k)-D vector using the given operation
300    (add/mul/min/max for int/fp and and/or/xor for int only).
301
302    Example:
303
304    ```mlir
305    %1 = vector.multi_reduction "add", %0 [1, 3] :
306      vector<4x8x16x32xf32> into vector<4x16xf32>
307    ```
308  }];
309  let builders = [
310    OpBuilder<(ins "Value":$source, "ArrayRef<bool>":$reductionMask,
311                   "CombiningKind":$kind)>
312  ];
313  let extraClassDeclaration = [{
314    static StringRef getKindAttrName() { return "kind"; }
315    static StringRef getReductionDimsAttrName() { return "reduction_dims"; }
316
317    VectorType getSourceVectorType() {
318      return source().getType().cast<VectorType>();
319    }
320    VectorType getDestVectorType() {
321      return dest().getType().cast<VectorType>();
322    }
323
324    SmallVector<bool> getReductionMask() {
325      SmallVector<bool> res(getSourceVectorType().getRank(), false);
326      for (auto ia : reduction_dims().getAsRange<IntegerAttr>())
327        res[ia.getInt()] = true;
328      return res;
329    }
330    static SmallVector<bool> getReductionMask(
331        ArrayRef<int64_t> reductionDims, unsigned sourceRank) {
332      SmallVector<bool> res(sourceRank, false);
333      for (auto idx : reductionDims)
334        res[idx] = true;
335      return res;
336    }
337
338    static SmallVector<int64_t> inferDestShape(
339      ArrayRef<int64_t> shape, ArrayRef<bool> reducedDimsMask) {
340      assert(shape.size() == reducedDimsMask.size() &&
341             "shape and maks of different sizes");
342      SmallVector<int64_t> res;
343      for (auto it : llvm::zip(reducedDimsMask, shape))
344        if (!std::get<0>(it))
345          res.push_back(std::get<1>(it));
346      return res;
347    }
348  }];
349  let assemblyFormat =
350    "$kind `,` $source attr-dict $reduction_dims `:` type($source) `to` type($dest)";
351}
352
353def Vector_BroadcastOp :
354  Vector_Op<"broadcast", [NoSideEffect,
355     PredOpTrait<"source operand and result have same element type",
356                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
357    Arguments<(ins AnyType:$source)>,
358    Results<(outs AnyVector:$vector)> {
359  let summary = "broadcast operation";
360  let description = [{
361    Broadcasts the scalar or k-D vector value in the source operand
362    to a n-D result vector such that the broadcast makes sense, i.e.,
363    the source operand is duplicated to match the given rank and sizes
364    in the result vector. The legality rules are:
365    * the source operand must have the same element type as the result type
366    * a k-D vector <s_1 x .. x s_k x type> can be broadcast to
367      a n-D vector <t_1 x .. x t_n x type> if
368       * k <= n, and
369       * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n
370          match exactly as s_j = t_i or s_j = 1:
371       ```
372           t_1 x   ..  t_n-k x t_n-k+1 x .. x t_i x .. x t_n
373                               s_1     x .. x s_j x .. x s_k
374               <duplication>         <potential stretch>
375       ```
376    The source operand is duplicated over all the missing leading dimensions
377    and stretched over the trailing dimensions where the source has a non-equal
378    dimension of 1. These rules imply that any scalar broadcast (k=0) to any
379    shaped vector with the same element type is always legal.
380
381    Example:
382
383    ```mlir
384    %0 = constant 0.0 : f32
385    %1 = vector.broadcast %0 : f32 to vector<16xf32>
386    %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
387    ```
388  }];
389  let extraClassDeclaration = [{
390    Type getSourceType() { return source().getType(); }
391    VectorType getVectorType() {
392      return vector().getType().cast<VectorType>();
393    }
394  }];
395  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)";
396  let hasFolder = 1;
397  let hasCanonicalizer = 1;
398}
399
400def Vector_ShuffleOp :
401  Vector_Op<"shuffle", [NoSideEffect,
402     PredOpTrait<"first operand v1 and result have same element type",
403                 TCresVTEtIsSameAsOpBase<0, 0>>,
404     PredOpTrait<"second operand v2 and result have same element type",
405                 TCresVTEtIsSameAsOpBase<0, 1>>]>,
406     Arguments<(ins AnyVector:$v1, AnyVector:$v2, I64ArrayAttr:$mask)>,
407     Results<(outs AnyVector:$vector)> {
408  let summary = "shuffle operation";
409  let description = [{
410    The shuffle operation constructs a permutation (or duplication) of elements
411    from two input vectors, returning a vector with the same element type as
412    the input and a length that is the same as the shuffle mask. The two input
413    vectors must have the same element type, rank, and trailing dimension sizes
414    and shuffles their values in the leading dimension (which may differ in size)
415    according to the given mask. The legality rules are:
416    * the two operands must have the same element type as the result
417    * the two operands and the result must have the same rank and trailing
418      dimension sizes, viz. given two k-D operands
419              v1 : <s_1 x s_2 x .. x s_k x type> and
420              v2 : <t_1 x t_2 x .. x t_k x type>
421      we have s_i = t_i for all 1 < i <= k
422    * the mask length equals the leading dimension size of the result
423    * numbering the input vector indices left to right across the operands, all
424      mask values must be within range, viz. given two k-D operands v1 and v2
425      above, all mask values are in the range [0,s_1+t_1)
426
427    Example:
428
429    ```mlir
430    %0 = vector.shuffle %a, %b[0, 3]
431               : vector<2xf32>, vector<2xf32>       ; yields vector<2xf32>
432    %1 = vector.shuffle %c, %b[0, 1, 2]
433               : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
434    %2 = vector.shuffle %a, %b[3, 2, 1, 0]
435               : vector<2xf32>, vector<2xf32>       ; yields vector<4xf32>
436    ```
437  }];
438  let builders = [
439    OpBuilder<(ins "Value":$v1, "Value":$v2, "ArrayRef<int64_t>")>
440  ];
441  let extraClassDeclaration = [{
442    static StringRef getMaskAttrName() { return "mask"; }
443    VectorType getV1VectorType() {
444      return v1().getType().cast<VectorType>();
445    }
446    VectorType getV2VectorType() {
447      return v2().getType().cast<VectorType>();
448    }
449    VectorType getVectorType() {
450      return vector().getType().cast<VectorType>();
451    }
452  }];
453}
454
455def Vector_ExtractElementOp :
456  Vector_Op<"extractelement", [NoSideEffect,
457     TypesMatchWith<"result type matches element type of vector operand",
458                    "vector", "result",
459                    "$_self.cast<ShapedType>().getElementType()">]>,
460    Arguments<(ins AnyVector:$vector, AnySignlessInteger:$position)>,
461    Results<(outs AnyType:$result)> {
462  let summary = "extractelement operation";
463  let description = [{
464    Takes an 1-D vector and a dynamic index position and extracts the
465    scalar at that position. Note that this instruction resembles
466    vector.extract, but is restricted to 1-D vectors and relaxed
467    to dynamic indices. It is meant to be closer to LLVM's version:
468    https://llvm.org/docs/LangRef.html#extractelement-instruction
469
470    Example:
471
472    ```mlir
473    %c = constant 15 : i32
474    %1 = vector.extractelement %0[%c : i32]: vector<16xf32>
475    ```
476  }];
477  let assemblyFormat = [{
478    $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector)
479  }];
480
481  let builders = [
482    OpBuilder<(ins "Value":$source, "int64_t":$position)>,
483    OpBuilder<(ins "Value":$source, "Value":$position)>
484  ];
485  let extraClassDeclaration = [{
486    VectorType getVectorType() {
487      return vector().getType().cast<VectorType>();
488    }
489  }];
490}
491
492def Vector_ExtractOp :
493  Vector_Op<"extract", [NoSideEffect,
494     PredOpTrait<"operand and result have same element type",
495                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
496    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$position)>,
497    Results<(outs AnyType)> {
498  let summary = "extract operation";
499  let description = [{
500    Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
501    the proper position. Degenerates to an element type in the 0-D case.
502
503    Example:
504
505    ```mlir
506    %1 = vector.extract %0[3]: vector<4x8x16xf32>
507    %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
508    ```
509  }];
510  let builders = [
511    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$position)>,
512    // Convenience builder which assumes the values in `position` are defined by
513    // ConstantIndexOp.
514    OpBuilder<(ins "Value":$source, "ValueRange":$position)>
515  ];
516  let extraClassDeclaration = [{
517    static StringRef getPositionAttrName() { return "position"; }
518    VectorType getVectorType() {
519      return vector().getType().cast<VectorType>();
520    }
521  }];
522  let hasCanonicalizer = 1;
523  let hasFolder = 1;
524}
525
526def Vector_ExtractMapOp :
527  Vector_Op<"extract_map", [NoSideEffect]>,
528    Arguments<(ins AnyVector:$vector, Variadic<Index>:$ids)>,
529    Results<(outs AnyVector)> {
530  let summary = "vector extract map operation";
531  let description = [{
532    Takes an N-D vector and extracts a sub-part of the vector starting at id
533    along each dimension.
534
535    The dimension associated to each element of `ids` used to extract are
536    implicitly deduced from the destination type. For each dimension the
537    multiplicity is the destination dimension size divided by the source
538    dimension size, each dimension with a multiplicity greater than 1 is
539    associated to the next id, following ids order.
540    For example if the source type is `vector<64x4x32xf32>` and the destination
541    type is `vector<4x4x2xf32>`, the first id maps to dimension 0 and the second
542    id to dimension 2.
543
544    Similarly to vector.tuple_get, this operation is used for progressive
545    lowering and should be folded away before converting to LLVM.
546
547    It is different than `vector.extract_slice` and
548    `vector.extract_strided_slice` as it takes a Value as index instead of an
549    attribute. Also in the future it is meant to support extracting along any
550    dimensions and not only the most major ones.
551
552    For instance:
553    ```
554    // dynamic computation producing the value 0 of index type
555    %idx0 = ... : index
556    // dynamic computation producing the value 1 of index type
557    %idx1 = ... : index
558    %0 = constant dense<0, 1, 2, 3>: vector<4xi32>
559    // extracts values [0, 1]
560    %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
561    // extracts values [1, 2]
562    %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
563    ```
564
565    Example:
566
567    ```mlir
568    %ev = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
569    %ev1 = vector.extract_map %v1[%id1, %id2] : vector<64x4x32xf32>
570      to vector<4x4x2xf32>
571    ```
572  }];
573  let builders = [
574    OpBuilder<(ins "Value":$vector, "ValueRange":$ids,
575                  "ArrayRef<int64_t>":$multiplicity,
576                  "AffineMap":$map)>];
577  let extraClassDeclaration = [{
578    VectorType getSourceVectorType() {
579      return vector().getType().cast<VectorType>();
580    }
581    VectorType getResultType() {
582      return getResult().getType().cast<VectorType>();
583    }
584    void getMultiplicity(SmallVectorImpl<int64_t> &multiplicity);
585    AffineMap map();
586  }];
587  let assemblyFormat = [{
588    $vector `[` $ids `]` attr-dict `:` type($vector) `to` type(results)
589  }];
590
591  let hasFolder = 1;
592}
593
594def Vector_FMAOp :
595  Op<Vector_Dialect, "fma", [
596       NoSideEffect, AllTypesMatch<["lhs", "rhs", "acc", "result"]>,
597       DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
598     ] # ElementwiseMappable.traits>,
599    Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc)>,
600    Results<(outs AnyVector:$result)> {
601  let summary = "vector fused multiply-add";
602  let description = [{
603    Multiply-add expressions operate on n-D vectors and compute a fused
604    pointwise multiply-and-accumulate: `$result = `$lhs * $rhs + $acc`.
605    All operands and result have the same vector type. The semantics
606    of the operation correspond to those of the `llvm.fma`
607    [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the
608    particular case of lowering to LLVM, this is guaranteed to lower
609    to the `llvm.fma.*` intrinsic.
610
611    Example:
612
613    ```mlir
614    %3 = vector.fma %0, %1, %2: vector<8x16xf32>
615    ```
616  }];
617  // Fully specified by traits.
618  let verifier = ?;
619  let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)";
620  let builders = [
621    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc),
622    [{build($_builder, $_state, lhs.getType(), lhs, rhs, acc);}]>
623  ];
624  let extraClassDeclaration = [{
625    VectorType getVectorType() { return lhs().getType().cast<VectorType>(); }
626  }];
627}
628
629def Vector_InsertElementOp :
630  Vector_Op<"insertelement", [NoSideEffect,
631     TypesMatchWith<"source operand type matches element type of result",
632                    "result", "source",
633                    "$_self.cast<ShapedType>().getElementType()">,
634     AllTypesMatch<["dest", "result"]>]>,
635     Arguments<(ins AnyType:$source, AnyVector:$dest,
636                    AnySignlessInteger:$position)>,
637     Results<(outs AnyVector:$result)> {
638  let summary = "insertelement operation";
639  let description = [{
640    Takes a scalar source, an 1-D destination vector and a dynamic index
641    position and inserts the source into the destination at the proper
642    position.  Note that this instruction resembles vector.insert, but
643    is restricted to 1-D vectors and relaxed to dynamic indices. It is
644    meant to be closer to LLVM's version:
645    https://llvm.org/docs/LangRef.html#insertelement-instruction
646
647    Example:
648
649    ```mlir
650    %c = constant 15 : i32
651    %f = constant 0.0f : f32
652    %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
653    ```
654  }];
655  let assemblyFormat = [{
656    $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:`
657    type($result)
658  }];
659
660  let builders = [
661    OpBuilder<(ins "Value":$source, "Value":$dest, "int64_t":$position)>,
662    OpBuilder<(ins "Value":$source, "Value":$dest, "Value":$position)>
663  ];
664  let extraClassDeclaration = [{
665    Type getSourceType() { return source().getType(); }
666    VectorType getDestVectorType() {
667      return dest().getType().cast<VectorType>();
668    }
669  }];
670
671}
672
673def Vector_InsertOp :
674  Vector_Op<"insert", [NoSideEffect,
675     PredOpTrait<"source operand and result have same element type",
676                 TCresVTEtIsSameAsOpBase<0, 0>>,
677     AllTypesMatch<["dest", "res"]>]>,
678     Arguments<(ins AnyType:$source, AnyVector:$dest, I64ArrayAttr:$position)>,
679     Results<(outs AnyVector:$res)> {
680  let summary = "insert operation";
681  let description = [{
682    Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
683    and inserts the n-D source into the (n+k)-D destination at the proper
684    position. Degenerates to a scalar source type when n = 0.
685
686    Example:
687
688    ```mlir
689    %2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32>
690    %5 = vector.insert %3, %4[3, 3, 3] : f32 into vector<4x8x16xf32>
691    ```
692  }];
693  let assemblyFormat = [{
694    $source `,` $dest $position attr-dict `:` type($source) `into` type($dest)
695  }];
696
697  let builders = [
698    OpBuilder<(ins "Value":$source, "Value":$dest,
699      "ArrayRef<int64_t>":$position)>,
700    // Convenience builder which assumes all values are constant indices.
701    OpBuilder<(ins "Value":$source, "Value":$dest, "ValueRange":$position)>
702  ];
703  let extraClassDeclaration = [{
704    static StringRef getPositionAttrName() { return "position"; }
705    Type getSourceType() { return source().getType(); }
706    VectorType getDestVectorType() {
707      return dest().getType().cast<VectorType>();
708    }
709  }];
710
711  let hasCanonicalizer = 1;
712  let hasFolder = 1;
713}
714
715def Vector_InsertMapOp :
716  Vector_Op<"insert_map", [NoSideEffect, AllTypesMatch<["dest", "result"]>]>,
717    Arguments<(ins AnyVector:$vector, AnyVector:$dest, Variadic<Index>:$ids)>,
718    Results<(outs AnyVector:$result)> {
719  let summary = "vector insert map operation";
720  let description = [{
721    Inserts a N-D vector and within a larger vector starting at id. The new
722    vector created will have the same size as the destination operand vector.
723
724    The dimension associated to each element of `ids` used to insert is
725    implicitly deduced from the source type (see `ExtractMapOp` for details).
726    For example if source type is `vector<4x4x2xf32>` and the destination type
727    is `vector<64x4x32xf32>`, the first id maps to dimension 0 and the second id
728    to dimension 2.
729
730    Similarly to vector.tuple_get, this operation is used for progressive
731    lowering and should be folded away before converting to LLVM.
732
733    It is different than `vector.insert` and `vector.insert_strided_slice` as it
734    takes a Value as index instead of an attribute. Also in the future it is
735    meant to support inserting along any dimensions and not only the most major
736    ones.
737
738    This operations is meant to be used in combination with vector.extract_map.
739
740    For instance:
741    ```
742    // dynamic computation producing the value 0 of index type
743    %idx0 = ... : index
744    // dynamic computation producing the value 1 of index type
745    %idx1 = ... : index /
746    %0 = constant dense<0, 1, 2, 3>: vector<4xi32>
747    // extracts values [0, 1]
748    %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
749    // extracts values [1, 2]
750    %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
751    // insert [0, 1] into [x, x, x, x] and produce [0, 1, x, x]
752    %3 = vector.insert_map %1, %0[%idx0] : vector<2xi32> into vector<4xi32>
753    // insert [1, 2] into [x, x, x, x] and produce [x, 1, 2, x]
754    %4 = vector.insert_map %2, %0[%idx1] : vector<2xi32> into vector<4xi32>
755    ```
756    Example:
757
758    ```mlir
759    %v = vector.insert_map %ev %v[%id] : vector<1xf32> into vector<32xf32>
760    %v1 = vector.insert_map %ev1, %v1[%arg0, %arg1] : vector<2x4x1xf32>
761      into vector<64x4x32xf32>
762    ```
763  }];
764  let builders = [OpBuilder<(ins "Value":$vector, "Value":$dest,
765                                "ValueRange":$ids)>];
766  let extraClassDeclaration = [{
767    VectorType getSourceVectorType() {
768      return vector().getType().cast<VectorType>();
769    }
770    VectorType getResultType() {
771      return getResult().getType().cast<VectorType>();
772    }
773    // Return a map indicating the dimension mapping to the given ids.
774    AffineMap map();
775  }];
776  let assemblyFormat = [{
777    $vector `,` $dest `[` $ids `]` attr-dict
778      `:` type($vector) `into` type($result)
779  }];
780}
781
782def Vector_InsertStridedSliceOp :
783  Vector_Op<"insert_strided_slice", [NoSideEffect,
784    PredOpTrait<"operand #0 and result have same element type",
785                 TCresVTEtIsSameAsOpBase<0, 0>>,
786    AllTypesMatch<["dest", "res"]>]>,
787    Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets,
788               I64ArrayAttr:$strides)>,
789    Results<(outs AnyVector:$res)> {
790  let summary = "strided_slice operation";
791  let description = [{
792    Takes a k-D source vector, an n-D destination vector (n >= k), n-sized
793    `offsets` integer array attribute, a k-sized `strides` integer array attribute
794    and inserts the k-D source vector as a strided subvector at the proper offset
795    into the n-D destination vector.
796
797    At the moment strides must contain only 1s.
798
799    Returns an n-D vector that is a copy of the n-D destination vector in which
800    the last k-D dimensions contain the k-D source vector elements strided at
801    the proper location as specified by the offsets.
802
803    Example:
804
805    ```mlir
806    %2 = vector.insert_strided_slice %0, %1
807        {offsets = [0, 0, 2], strides = [1, 1]}:
808      vector<2x4xf32> into vector<16x4x8xf32>
809    ```
810  }];
811
812  let assemblyFormat = [{
813    $source `,` $dest attr-dict `:` type($source) `into` type($dest)
814  }];
815
816  let builders = [
817    OpBuilder<(ins "Value":$source, "Value":$dest,
818      "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$strides)>
819  ];
820  let extraClassDeclaration = [{
821    static StringRef getOffsetsAttrName() { return "offsets"; }
822    static StringRef getStridesAttrName() { return "strides"; }
823    VectorType getSourceVectorType() {
824      return source().getType().cast<VectorType>();
825    }
826    VectorType getDestVectorType() {
827      return dest().getType().cast<VectorType>();
828    }
829  }];
830}
831
832def Vector_OuterProductOp :
833  Vector_Op<"outerproduct", [NoSideEffect,
834    PredOpTrait<"lhs operand and result have same element type",
835                TCresVTEtIsSameAsOpBase<0, 0>>,
836    PredOpTrait<"rhs operand and result have same element type",
837                TCresVTEtIsSameAsOpBase<0, 1>>]>,
838    Arguments<(ins AnyVector:$lhs, AnyType:$rhs,
839               Variadic<AnyVector>:$acc,
840               DefaultValuedAttr<Vector_CombiningKindAttr, "CombiningKind::ADD">:$kind)>,
841    Results<(outs AnyVector)> {
842  let summary = "vector outerproduct with optional fused add";
843  let description = [{
844    Takes 2 1-D vectors and returns the 2-D vector containing the outer-product,
845    as illustrated below:
846    ```
847     outer |   [c, d]
848     ------+------------
849       [a, | [ [a*c, a*d],
850        b] |   [b*c, b*d] ]
851    ```
852    This operation also accepts a 1-D vector lhs and a scalar rhs. In this
853    case a simple AXPY operation is performed, which returns a 1-D vector.
854    ```
855        [a, b] * c = [a*c, b*c]
856    ```
857
858    An optional extra vector argument with the same shape as the output
859    vector may be specified in which case the operation returns the sum of
860    the outer-product and the extra vector. In this multiply-accumulate
861    scenario for floating-point arguments, the rounding mode is enforced
862    by guaranteeing that a fused-multiply add operation is emitted. When
863    lowered to the LLVMIR dialect, this form emits `llvm.intr.fma`, which
864    is guaranteed to lower to actual `fma` instructions on x86.
865
866    An optional kind attribute may be specified to be add/mul/min/max
867    for int/fp, and and/or/xor for int only. The default is "add", in which
868    case the operation returns a fused multiply-add. In other cases it returns
869    a multiply followed by the appropriate operation (for example, a compare and
870    select for "max").
871
872    Example:
873
874    ```
875    %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
876    return %2: vector<4x8xf32>
877
878    %3 = vector.outerproduct %0, %1, %2:
879      vector<4xf32>, vector<8xf32>, vector<4x8xf32>
880    return %3: vector<4x8xf32>
881
882    %4 = vector.outerproduct %0, %1, %2 {kind = #vector.kind<max>}:
883      vector<4xf32>, vector<8xf32>, vector<4x8xf32>
884    return %3: vector<4x8xf32>
885
886    %6 = vector.outerproduct %4, %5: vector<10xf32>, f32
887    return %6: vector<10xf32>
888
889    ```
890  }];
891  let builders = [
892    // Build an op without mask, use the type of `acc` as the return type.
893    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$acc)>
894  ];
895  let extraClassDeclaration = [{
896    VectorType getOperandVectorTypeLHS() {
897      return lhs().getType().cast<VectorType>();
898    }
899    Type getOperandTypeRHS() {
900      return rhs().getType();
901    }
902    VectorType getOperandVectorTypeACC() {
903      return (llvm::size(acc()) == 0)
904        ? VectorType()
905        : (*acc().begin()).getType().cast<VectorType>();
906    }
907    VectorType getVectorType() {
908      return getResult().getType().cast<VectorType>();
909    }
910    static constexpr StringRef getKindAttrName() {
911      return "kind";
912    }
913    static CombiningKind getDefaultKind() {
914      return CombiningKind::ADD;
915    }
916  }];
917}
918
919// TODO: Add transformation which decomposes ReshapeOp into an optimized
920// sequence of vector rotate/shuffle/select operations.
921def Vector_ReshapeOp :
922  Vector_Op<"reshape", [AttrSizedOperandSegments, NoSideEffect]>,
923    Arguments<(ins AnyVector:$vector, Variadic<Index>:$input_shape,
924               Variadic<Index>:$output_shape,
925               I64ArrayAttr:$fixed_vector_sizes)>,
926    Results<(outs AnyVector:$result)> {
927  let summary = "vector reshape operation";
928  let description = [{
929    Reshapes its vector operand from 'input_shape' to 'output_shape' maintaining
930    fixed vector dimension 'fixed_vector_sizes' on the innermost vector
931    dimensions.
932
933    The parameters 'input_shape' and 'output_shape' represent valid data shapes
934    across fixed vector shapes. For example, if a vector has a valid data
935    shape [6] with fixed vector size [8], then the valid data elements are
936    assumed to be stored at the beginning of the vector with the remaining
937    vector elements undefined.
938
939    In the examples below, valid data elements are represented by an alphabetic
940    character, and undefined data elements are represented by '-'.
941
942    Example
943
944      vector<1x8xf32> with valid data shape [6], fixed vector sizes [8]
945
946                input: [a, b, c, d, e, f]
947
948           layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
949
950        vector layout: [a, b, c, d, e, f, -, -]
951
952    Example
953
954      vector<2x8xf32> with valid data shape [10], fixed vector sizes [8]
955
956                input: [a, b, c, d, e, f, g, h, i, j]
957
958           layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
959
960        vector layout: [[a, b, c, d, e, f, g, h],
961                        [i, j, -, -, -, -, -, -]]
962
963    Example
964
965      vector<2x2x2x3xf32> with valid data shape [3, 5], fixed vector sizes
966      [2, 3]
967
968                input: [[a, b, c, d, e],
969                        [f, g, h, i, j],
970                        [k, l, m, n, o]]
971
972           layout map: (d0, d1) -> (d0 floordiv 3, d1 floordiv 5,
973                                    d0 mod 3, d1 mod 5)
974
975        vector layout: [[[[a, b, c],
976                          [f, g, h]]
977                         [[d, e, -],
978                          [i, j, -]]],
979                        [[[k, l, m],
980                          [-, -, -]]
981                         [[n, o, -],
982                          [-, -, -]]]]
983
984    Example
985
986      %1 = vector.reshape %0, [%c3, %c6], [%c2, %c9], [4]
987        : vector<3x2x4xf32> to vector<2x3x4xf32>
988
989             input: [[a, b, c, d, e, f],
990                     [g, h, i, j, k, l],
991                     [m, n, o, p, q, r]]
992
993        layout map: (d0, d1) -> (d0, d1 floordiv 4, d1 mod 4)
994
995
996      Input vector:  [[[a, b, c, d],
997                       [e, f, -, -]],
998                      [[g, h, i, j],
999                       [k, l, -, -]],
1000                      [[m, n, o, p],
1001                       [q, r, -, -]]]
1002
1003      Output vector:  [[[a, b, c, d],
1004                        [e, f, g, h],
1005                        [i, -, -, -]],
1006                       [[j, k, l, m],
1007                        [n, o, p, q],
1008                        [r, -, -, -]]]
1009  }];
1010
1011  let extraClassDeclaration = [{
1012    VectorType getInputVectorType() {
1013      return vector().getType().cast<VectorType>();
1014    }
1015    VectorType getOutputVectorType() {
1016      return getResult().getType().cast<VectorType>();
1017    }
1018
1019    /// Returns as integer value the number of input shape operands.
1020    int64_t getNumInputShapeSizes() { return input_shape().size(); }
1021
1022    /// Returns as integer value the number of output shape operands.
1023    int64_t getNumOutputShapeSizes() { return output_shape().size(); }
1024
1025    void getFixedVectorSizes(SmallVectorImpl<int64_t> &results);
1026
1027    static StringRef getFixedVectorSizesAttrName() {
1028      return "fixed_vector_sizes";
1029    }
1030    static StringRef getInputShapeAttrName() { return "input_shape"; }
1031    static StringRef getOutputShapeAttrName() { return "output_shape"; }
1032  }];
1033
1034  let assemblyFormat = [{
1035    $vector `,` `[` $input_shape `]` `,` `[` $output_shape `]` `,`
1036    $fixed_vector_sizes attr-dict `:` type($vector) `to` type($result)
1037  }];
1038}
1039
1040def Vector_ExtractStridedSliceOp :
1041  Vector_Op<"extract_strided_slice", [NoSideEffect,
1042    PredOpTrait<"operand and result have same element type",
1043                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
1044    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets,
1045               I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>,
1046    Results<(outs AnyVector)> {
1047  let summary = "extract_strided_slice operation";
1048  let description = [{
1049    Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized
1050    `sizes` integer array attribute, a k-sized `strides` integer array
1051    attribute and extracts the n-D subvector at the proper offset.
1052
1053    At the moment strides must contain only 1s.
1054    // TODO: support non-1 strides.
1055
1056    Returns an n-D vector where the first k-D dimensions match the `sizes`
1057    attribute. The returned subvector contains the elements starting at offset
1058    `offsets` and ending at `offsets + sizes`.
1059
1060    Example:
1061
1062    ```mlir
1063    %1 = vector.extract_strided_slice %0
1064        {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
1065      vector<4x8x16xf32> to vector<2x4x16xf32>
1066
1067    // TODO: Evolve to a range form syntax similar to:
1068    %1 = vector.extract_strided_slice %0[0:2:1][2:4:1]
1069      vector<4x8x16xf32> to vector<2x4x16xf32>
1070    ```
1071  }];
1072  let builders = [
1073    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$offsets,
1074      "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)>
1075  ];
1076  let extraClassDeclaration = [{
1077    static StringRef getOffsetsAttrName() { return "offsets"; }
1078    static StringRef getSizesAttrName() { return "sizes"; }
1079    static StringRef getStridesAttrName() { return "strides"; }
1080    VectorType getVectorType(){ return vector().getType().cast<VectorType>(); }
1081    void getOffsets(SmallVectorImpl<int64_t> &results);
1082  }];
1083  let hasCanonicalizer = 1;
1084  let hasFolder = 1;
1085  let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)";
1086}
1087
1088def Vector_TransferReadOp :
1089  Vector_Op<"transfer_read", [
1090      DeclareOpInterfaceMethods<VectorTransferOpInterface>,
1091      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
1092      DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1093      AttrSizedOperandSegments
1094    ]>,
1095    Arguments<(ins AnyShaped:$source, Variadic<Index>:$indices,
1096               AffineMapAttr:$permutation_map, AnyType:$padding,
1097               Optional<VectorOf<[I1]>>:$mask,
1098               OptionalAttr<BoolArrayAttr>:$in_bounds)>,
1099    Results<(outs AnyVector:$vector)> {
1100
1101  let summary = "Reads a supervector from memory into an SSA vector value.";
1102
1103  let description = [{
1104    The `vector.transfer_read` op performs a read from a slice within a
1105    [MemRef](../LangRef.md#memref-type) or a Ranked
1106    [Tensor](../LangRef.md#tensor-type) supplied as its first operand into a
1107    [vector](../LangRef.md#vector-type) of the same base elemental type.
1108
1109    A memref/tensor operand with vector element type, must have its vector
1110    element type match a suffix (shape and element type) of the vector (e.g.
1111    memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>).
1112
1113    The slice is further defined by a full-rank index within the MemRef/Tensor,
1114    supplied as the operands `2 .. 1 + rank(memref/tensor)`.
1115
1116    The permutation_map [attribute](../LangRef.md#attributes) is an
1117    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
1118    slice to match the vector shape. The permutation map may be implicit and
1119    omitted from parsing and printing if it is the canonical minor identity map
1120    (i.e. if it does not permute or broadcast any dimension).
1121
1122    The size of the slice is specified by the size of the vector, given as the
1123    return type.
1124
1125    An SSA value `padding` of the same elemental type as the MemRef/Tensor is
1126    provided to specify a fallback value in the case of out-of-bounds accesses
1127    and/or masking.
1128
1129    An optional SSA value `mask` of the same shape as the vector type may be
1130    specified to mask out elements. Such elements will be replaces with
1131    `padding`. Elements whose corresponding mask element is `0` are masked out.
1132
1133    An optional boolean array attribute is provided to specify which dimensions
1134    of the transfer are guaranteed to be within bounds. The length of the array
1135    must equal the rank of the vector type. Broadcast dimensions must always be
1136    in-bounds. The absence of this optional `in_bounds` attribute signifies that
1137    any dimension of the transfer (except for broadcasts) may be out-of-bounds.
1138    A `vector.transfer_read` can be lowered to a simple load if all dimensions
1139    are specified to be within bounds and no `mask` was specified.
1140
1141    Note that `in_bounds` is specified for result dimensions and not input
1142    dimensions. The starting point of the transfer, i.e.,
1143    `%A[%expr1, %expr2, %expr3, %expr4]` in the example below, is expected to
1144    be in-bounds and as indices are increasing, accesses may run out-of-bounds.
1145
1146    This operation is called 'read' by opposition to 'load' because the
1147    super-vector granularity is generally not representable with a single
1148    hardware register. A `vector.transfer_read` is thus a mid-level abstraction
1149    that supports super-vectorization with non-effecting padding for full-tile
1150    only operations.
1151
1152    More precisely, let's dive deeper into the permutation_map for the following
1153    MLIR:
1154
1155    ```mlir
1156    vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4]
1157      { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } :
1158      memref<?x?x?x?xf32>, vector<3x4x5xf32>
1159    ```
1160
1161    This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3,
1162    %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice
1163    is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]`
1164
1165    That slice needs to be read into a `vector<3x4x5xf32>`. Since the
1166    permutation map is not full rank, there must be a broadcast along vector
1167    dimension `1`.
1168
1169    A notional lowering of vector.transfer_read could generate code resembling:
1170
1171    ```mlir
1172    // %expr1, %expr2, %expr3, %expr4 defined before this point
1173    %tmp = alloc() : vector<3x4x5xf32>
1174    %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
1175    for %i = 0 to 3 {
1176      affine.for %j = 0 to 4 {
1177        affine.for %k = 0 to 5 {
1178          %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
1179            memref<?x?x?x?xf32>
1180          store %tmp[%i, %j, %k] : vector<3x4x5xf32>
1181    }}}
1182    %c0 = constant 0 : index
1183    %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
1184    ```
1185
1186    On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that
1187    the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are
1188    actually transferred between `%A` and `%tmp`.
1189
1190    Alternatively, if a notional vector broadcast operation were available, the
1191    lowered code would resemble:
1192
1193    ```mlir
1194    // %expr1, %expr2, %expr3, %expr4 defined before this point
1195    %tmp = alloc() : vector<3x4x5xf32>
1196    %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
1197    for %i = 0 to 3 {
1198      affine.for %k = 0 to 5 {
1199        %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
1200          memref<?x?x?x?xf32>
1201        store %tmp[%i, 0, %k] : vector<3x4x5xf32>
1202    }}
1203    %c0 = constant 0 : index
1204    %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
1205    %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32>
1206    ```
1207
1208    where `broadcast` broadcasts from element 0 to all others along the
1209    specified dimension. This time, the temporary storage footprint is `3 * 5`
1210    values which is the same amount of data as the `3 * 5` values transferred.
1211    An additional `1` broadcast is required. On a GPU this broadcast could be
1212    implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.
1213
1214    Syntax
1215    ```
1216    operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
1217      `{` attribute-entry `} :` memref-type `,` vector-type
1218    ```
1219
1220    Example:
1221
1222    ```mlir
1223    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
1224    // and pad with %f0 to handle the boundary case:
1225    %f0 = constant 0.0f : f32
1226    for %i0 = 0 to %0 {
1227      affine.for %i1 = 0 to %1 step 256 {
1228        affine.for %i2 = 0 to %2 step 32 {
1229          %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
1230               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
1231               memref<?x?x?xf32>, vector<32x256xf32>
1232    }}}
1233
1234    // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
1235    // vector<128xf32>. The underlying implementation will require a 1-D vector
1236    // broadcast:
1237    for %i0 = 0 to %0 {
1238      affine.for %i1 = 0 to %1 {
1239        %3 = vector.transfer_read %A[%i0, %i1]
1240             {permutation_map: (d0, d1) -> (0)} :
1241             memref<?x?xf32>, vector<128xf32>
1242      }
1243    }
1244
1245    // Read from a memref with vector element type.
1246    %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
1247      {permutation_map = (d0, d1)->(d0, d1)}
1248        : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
1249
1250    // Read from a tensor with vector element type.
1251    %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
1252      {permutation_map = (d0, d1)->(d0, d1)}
1253        : tensor<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
1254    ```
1255  }];
1256
1257  let builders = [
1258    // Builder that sets padding to zero.
1259    OpBuilder<(ins "VectorType":$vector, "Value":$source,
1260      "ValueRange":$indices, "AffineMap":$permutationMap,
1261      CArg<"ArrayRef<bool>", "{}">:$inBounds)>,
1262    // Builder that sets padding to 'getMinorIdentityMap'.
1263    OpBuilder<(ins "VectorType":$vector, "Value":$source,
1264      "ValueRange":$indices, "Value":$padding,
1265      CArg<"ArrayRef<bool>", "{}">:$inBounds)>,
1266    // Builder that sets permutation map (resp. padding) to
1267    // 'getMinorIdentityMap' (resp. zero).
1268    OpBuilder<(ins "VectorType":$vector, "Value":$source,
1269      "ValueRange":$indices, CArg<"ArrayRef<bool>", "{}">:$inBounds)>,
1270    // Builder that does not set mask.
1271    OpBuilder<(ins "Type":$vector, "Value":$source,
1272      "ValueRange":$indices, "AffineMapAttr":$permutationMap, "Value":$padding,
1273      "ArrayAttr":$inBounds)>,
1274    // Builder that does not set mask.
1275    OpBuilder<(ins "Type":$vector, "Value":$source,
1276      "ValueRange":$indices, "AffineMap":$permutationMap, "Value":$padding,
1277      "ArrayAttr":$inBounds)>
1278  ];
1279
1280  let hasFolder = 1;
1281}
1282
1283def Vector_TransferWriteOp :
1284  Vector_Op<"transfer_write", [
1285      DeclareOpInterfaceMethods<VectorTransferOpInterface>,
1286      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>,
1287      DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
1288      AttrSizedOperandSegments
1289  ]>,
1290    Arguments<(ins AnyVector:$vector, AnyShaped:$source,
1291               Variadic<Index>:$indices,
1292               AffineMapAttr:$permutation_map,
1293               Optional<VectorOf<[I1]>>:$mask,
1294               OptionalAttr<BoolArrayAttr>:$in_bounds)>,
1295    Results<(outs Optional<AnyRankedTensor>:$result)> {
1296
1297  let summary = "The vector.transfer_write op writes a supervector to memory.";
1298
1299  let description = [{
1300    The `vector.transfer_write` op performs a write from a
1301    [vector](../LangRef.md#vector-type), supplied as its first operand, into a
1302    slice within a [MemRef](../LangRef.md#memref-type) or a Ranked
1303    [Tensor](../LangRef.md#tensor-type) of the same base elemental type,
1304    supplied as its second operand.
1305
1306    A vector memref/tensor operand must have its vector element type match a
1307    suffix (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>,
1308    vector<1x1x4x3xf32>). If the operand is a tensor, the operation returns a
1309    new tensor of the same type.
1310
1311    The slice is further defined by a full-rank index within the MemRef/Tensor,
1312    supplied as the operands `3 .. 2 + rank(memref/tensor)`.
1313
1314    The permutation_map [attribute](../LangRef.md#attributes) is an
1315    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
1316    slice to match the vector shape. The permutation map may be implicit and
1317    omitted from parsing and printing if it is the canonical minor identity map
1318    (i.e. if it does not permute any dimension). In contrast to `transfer_read`,
1319    write ops cannot have broadcast dimensions.
1320
1321    The size of the slice is specified by the size of the vector.
1322
1323    An optional SSA value `mask` of the same shape as the vector type may be
1324    specified to mask out elements. Elements whose corresponding mask element
1325    is `0` are masked out.
1326
1327    An optional boolean array attribute is provided to specify which dimensions
1328    of the transfer are guaranteed to be within bounds. The absence of this
1329    `in_bounds` attribute signifies that any dimension of the transfer may be
1330    out-of-bounds. A `vector.transfer_write` can be lowered to a simple store
1331    if all dimensions are specified to be within bounds and no `mask` was
1332    specified.
1333
1334    An optional boolean array attribute is provided to specify which dimensions
1335    of the transfer are guaranteed to be within bounds. The length of the array
1336    must equal the rank of the vector type. The absence of this optional
1337    `in_bounds` attribute signifies that any dimension of the transfer
1338    may be out-of-bounds. A `vector.transfer_write` can be lowered to a simple
1339    store if all dimensions are specified to be within bounds and no `mask` was
1340    specified.
1341
1342    Note that `in_bounds` is specified for result dimensions and not input
1343    dimensions. The starting point of the transfer, i.e.,
1344    `%A[%expr1, %expr2, %expr3, %expr4]` in the example below, is expected to
1345    be in-bounds and as indices are increasing, accesses may run out-of-bounds.
1346
1347    This operation is called 'write' by opposition to 'store' because the
1348    super-vector granularity is generally not representable with a single
1349    hardware register. A `vector.transfer_write` is thus a
1350    mid-level abstraction that supports super-vectorization with non-effecting
1351    padding for full-tile-only code. It is the responsibility of
1352    `vector.transfer_write`'s implementation to ensure the memory writes are
1353    valid. Different lowerings may be pertinent depending on the hardware
1354    support.
1355
1356    Example:
1357
1358    ```mlir
1359    // write vector<16x32x64xf32> into the slice
1360    //   `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
1361    for %i0 = 0 to %0 {
1362      affine.for %i1 = 0 to %1 step 32 {
1363        affine.for %i2 = 0 to %2 step 64 {
1364          affine.for %i3 = 0 to %3 step 16 {
1365            %val = `ssa-value` : vector<16x32x64xf32>
1366            vector.transfer_write %val, %A[%i0, %i1, %i2, %i3]
1367              {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
1368              vector<16x32x64xf32>, memref<?x?x?x?xf32>
1369    }}}}
1370
1371    // write to a memref with vector element type.
1372    vector.transfer_write %4, %arg1[%c3, %c3]
1373      {permutation_map = (d0, d1)->(d0, d1)}
1374        : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
1375
1376    // return a tensor where the vector is inserted into the source tensor.
1377    %5 = vector.transfer_write %4, %arg1[%c3, %c3]
1378      {permutation_map = (d0, d1)->(d0, d1)}
1379        : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
1380    ```
1381  }];
1382
1383  let builders = [
1384    // Builder that sets permutation map to 'getMinorIdentityMap'.
1385    OpBuilder<(ins "Value":$vector, "Value":$source, "ValueRange":$indices,
1386      CArg<"ArrayRef<bool>", "{}">:$inBounds)>,
1387    OpBuilder<(ins "Value":$vector, "Value":$source, "ValueRange":$indices,
1388      "AffineMap":$permutationMap)>,
1389    OpBuilder<(ins "Value":$vector, "Value":$source, "ValueRange":$indices,
1390      "AffineMapAttr":$permutationMap, "ArrayAttr":$inBounds)>,
1391    OpBuilder<(ins "Value":$vector, "Value":$source, "ValueRange":$indices,
1392      "AffineMap":$permutationMap, "Value":$mask, "ArrayAttr":$inBounds)>,
1393    OpBuilder<(ins "Value":$vector, "Value":$source, "ValueRange":$indices,
1394      "AffineMap":$permutationMap, "ArrayAttr":$inBounds)>,
1395  ];
1396
1397  let hasFolder = 1;
1398  let hasCanonicalizer = 1;
1399}
1400
1401def Vector_LoadOp : Vector_Op<"load"> {
1402  let summary = "reads an n-D slice of memory into an n-D vector";
1403  let description = [{
1404    The 'vector.load' operation reads an n-D slice of memory into an n-D
1405    vector. It takes a 'base' memref, an index for each memref dimension and a
1406    result vector type as arguments. It returns a value of the result vector
1407    type. The 'base' memref and indices determine the start memory address from
1408    which to read. Each index provides an offset for each memref dimension
1409    based on the element type of the memref. The shape of the result vector
1410    type determines the shape of the slice read from the start memory address.
1411    The elements along each dimension of the slice are strided by the memref
1412    strides. Only unit strides are allowed along the most minor memref
1413    dimension. These constraints guarantee that elements read along the first
1414    dimension of the slice are contiguous in memory.
1415
1416    The memref element type can be a scalar or a vector type. If the memref
1417    element type is a scalar, it should match the element type of the result
1418    vector. If the memref element type is vector, it should match the result
1419    vector type.
1420
1421    Example 1: 1-D vector load on a scalar memref.
1422    ```mlir
1423    %result = vector.load %base[%i, %j] : memref<100x100xf32>, vector<8xf32>
1424    ```
1425
1426    Example 2: 1-D vector load on a vector memref.
1427    ```mlir
1428    %result = vector.load %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
1429    ```
1430
1431    Example 3:  2-D vector load on a scalar memref.
1432    ```mlir
1433    %result = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32>
1434    ```
1435
1436    Example 4:  2-D vector load on a vector memref.
1437    ```mlir
1438    %result = vector.load %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
1439    ```
1440
1441    Representation-wise, the 'vector.load' operation permits out-of-bounds
1442    reads. Support and implementation of out-of-bounds vector loads is
1443    target-specific. No assumptions should be made on the value of elements
1444    loaded out of bounds. Not all targets may support out-of-bounds vector
1445    loads.
1446
1447    Example 5:  Potential out-of-bound vector load.
1448    ```mlir
1449    %result = vector.load %memref[%index] : memref<?xf32>, vector<8xf32>
1450    ```
1451
1452    Example 6:  Explicit out-of-bound vector load.
1453    ```mlir
1454    %result = vector.load %memref[%c0] : memref<7xf32>, vector<8xf32>
1455    ```
1456  }];
1457
1458  let arguments = (ins Arg<AnyMemRef, "the reference to load from",
1459      [MemRead]>:$base,
1460      Variadic<Index>:$indices);
1461  let results = (outs AnyVector:$result);
1462
1463  let extraClassDeclaration = [{
1464    MemRefType getMemRefType() {
1465      return base().getType().cast<MemRefType>();
1466    }
1467
1468    VectorType getVectorType() {
1469      return result().getType().cast<VectorType>();
1470    }
1471  }];
1472
1473  let hasFolder = 1;
1474
1475  let assemblyFormat =
1476      "$base `[` $indices `]` attr-dict `:` type($base) `,` type($result)";
1477}
1478
1479def Vector_StoreOp : Vector_Op<"store"> {
1480  let summary = "writes an n-D vector to an n-D slice of memory";
1481  let description = [{
1482    The 'vector.store' operation writes an n-D vector to an n-D slice of memory.
1483    It takes the vector value to be stored, a 'base' memref and an index for
1484    each memref dimension. The 'base' memref and indices determine the start
1485    memory address from which to write. Each index provides an offset for each
1486    memref dimension based on the element type of the memref. The shape of the
1487    vector value to store determines the shape of the slice written from the
1488    start memory address. The elements along each dimension of the slice are
1489    strided by the memref strides. Only unit strides are allowed along the most
1490    minor memref dimension. These constraints guarantee that elements written
1491    along the first dimension of the slice are contiguous in memory.
1492
1493    The memref element type can be a scalar or a vector type. If the memref
1494    element type is a scalar, it should match the element type of the value
1495    to store. If the memref element type is vector, it should match the type
1496    of the value to store.
1497
1498    Example 1: 1-D vector store on a scalar memref.
1499    ```mlir
1500    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<8xf32>
1501    ```
1502
1503    Example 2: 1-D vector store on a vector memref.
1504    ```mlir
1505    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<8xf32>>, vector<8xf32>
1506    ```
1507
1508    Example 3:  2-D vector store on a scalar memref.
1509    ```mlir
1510    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xf32>, vector<4x8xf32>
1511    ```
1512
1513    Example 4:  2-D vector store on a vector memref.
1514    ```mlir
1515    vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32>
1516    ```
1517
1518    Representation-wise, the 'vector.store' operation permits out-of-bounds
1519    writes. Support and implementation of out-of-bounds vector stores are
1520    target-specific. No assumptions should be made on the memory written out of
1521    bounds. Not all targets may support out-of-bounds vector stores.
1522
1523    Example 5:  Potential out-of-bounds vector store.
1524    ```mlir
1525    vector.store %valueToStore, %memref[%index] : memref<?xf32>, vector<8xf32>
1526    ```
1527
1528    Example 6:  Explicit out-of-bounds vector store.
1529    ```mlir
1530    vector.store %valueToStore, %memref[%c0] : memref<7xf32>, vector<8xf32>
1531    ```
1532  }];
1533
1534  let arguments = (ins AnyVector:$valueToStore,
1535      Arg<AnyMemRef, "the reference to store to",
1536      [MemWrite]>:$base,
1537      Variadic<Index>:$indices);
1538
1539  let extraClassDeclaration = [{
1540    MemRefType getMemRefType() {
1541      return base().getType().cast<MemRefType>();
1542    }
1543
1544    VectorType getVectorType() {
1545      return valueToStore().getType().cast<VectorType>();
1546    }
1547  }];
1548
1549  let hasFolder = 1;
1550
1551  let assemblyFormat = "$valueToStore `,` $base `[` $indices `]` attr-dict "
1552                       "`:` type($base) `,` type($valueToStore)";
1553}
1554
1555def Vector_MaskedLoadOp :
1556  Vector_Op<"maskedload">,
1557    Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
1558               Variadic<Index>:$indices,
1559               VectorOfRankAndType<[1], [I1]>:$mask,
1560               VectorOfRank<[1]>:$pass_thru)>,
1561    Results<(outs VectorOfRank<[1]>:$result)> {
1562
1563  let summary = "loads elements from memory into a vector as defined by a mask vector";
1564
1565  let description = [{
1566    The masked load reads elements from memory into a 1-D vector as defined
1567    by a base with indices and a 1-D mask vector. When the mask is set, the
1568    element is read from memory. Otherwise, the corresponding element is taken
1569    from a 1-D pass-through vector. Informally the semantics are:
1570    ```
1571    result[0] := mask[0] ? base[i+0] : pass_thru[0]
1572    result[1] := mask[1] ? base[i+1] : pass_thru[1]
1573    etc.
1574    ```
1575    The masked load can be used directly where applicable, or can be used
1576    during progressively lowering to bring other memory operations closer to
1577    hardware ISA support for a masked load. The semantics of the operation
1578    closely correspond to those of the `llvm.masked.load`
1579    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics).
1580
1581    Examples:
1582
1583    ```mlir
1584    %0 = vector.maskedload %base[%i], %mask, %pass_thru
1585       : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32>
1586
1587    %1 = vector.maskedload %base[%i, %j], %mask, %pass_thru
1588       : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1589    ```
1590  }];
1591  let extraClassDeclaration = [{
1592    MemRefType getMemRefType() {
1593      return base().getType().cast<MemRefType>();
1594    }
1595    VectorType getMaskVectorType() {
1596      return mask().getType().cast<VectorType>();
1597    }
1598    VectorType getPassThruVectorType() {
1599      return pass_thru().getType().cast<VectorType>();
1600    }
1601    VectorType getVectorType() {
1602      return result().getType().cast<VectorType>();
1603    }
1604  }];
1605  let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` "
1606    "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)";
1607  let hasCanonicalizer = 1;
1608  let hasFolder = 1;
1609}
1610
1611def Vector_MaskedStoreOp :
1612  Vector_Op<"maskedstore">,
1613    Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
1614               Variadic<Index>:$indices,
1615               VectorOfRankAndType<[1], [I1]>:$mask,
1616               VectorOfRank<[1]>:$valueToStore)> {
1617
1618  let summary = "stores elements from a vector into memory as defined by a mask vector";
1619
1620  let description = [{
1621    The masked store operation writes elements from a 1-D vector into memory
1622    as defined by a base with indices and a 1-D mask vector. When the mask is
1623    set, the corresponding element from the vector is written to memory. Otherwise,
1624    no action is taken for the element. Informally the semantics are:
1625    ```
1626    if (mask[0]) base[i+0] = value[0]
1627    if (mask[1]) base[i+1] = value[1]
1628    etc.
1629    ```
1630    The masked store can be used directly where applicable, or can be used
1631    during progressively lowering to bring other memory operations closer to
1632    hardware ISA support for a masked store. The semantics of the operation
1633    closely correspond to those of the `llvm.masked.store`
1634    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-store-intrinsics).
1635
1636    Examples:
1637
1638    ```mlir
1639    vector.maskedstore %base[%i], %mask, %value
1640      : memref<?xf32>, vector<8xi1>, vector<8xf32>
1641
1642    vector.maskedstore %base[%i, %j], %mask, %value
1643      : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
1644    ```
1645  }];
1646  let extraClassDeclaration = [{
1647    MemRefType getMemRefType() {
1648      return base().getType().cast<MemRefType>();
1649    }
1650    VectorType getMaskVectorType() {
1651      return mask().getType().cast<VectorType>();
1652    }
1653    VectorType getVectorType() {
1654      return valueToStore().getType().cast<VectorType>();
1655    }
1656  }];
1657  let assemblyFormat =
1658      "$base `[` $indices `]` `,` $mask `,` $valueToStore "
1659      "attr-dict `:` type($base) `,` type($mask) `,` type($valueToStore)";
1660  let hasCanonicalizer = 1;
1661  let hasFolder = 1;
1662}
1663
1664def Vector_GatherOp :
1665  Vector_Op<"gather">,
1666    Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
1667               Variadic<Index>:$indices,
1668	       VectorOfRankAndType<[1], [AnyInteger, Index]>:$index_vec,
1669               VectorOfRankAndType<[1], [I1]>:$mask,
1670               VectorOfRank<[1]>:$pass_thru)>,
1671    Results<(outs VectorOfRank<[1]>:$result)> {
1672
1673  let summary = "gathers elements from memory into a vector as defined by an index vector and mask";
1674
1675  let description = [{
1676    The gather operation gathers elements from memory into a 1-D vector as
1677    defined by a base with indices and an additional 1-D index vector, but
1678    only if the corresponding bit is set in a 1-D mask vector. Otherwise, the
1679    element is taken from a 1-D pass-through vector. Informally the semantics
1680    are:
1681    ```
1682    result[0] := mask[0] ? base[index[0]] : pass_thru[0]
1683    result[1] := mask[1] ? base[index[1]] : pass_thru[1]
1684    etc.
1685    ```
1686    The vector dialect leaves out-of-bounds behavior undefined.
1687
1688    The gather operation can be used directly where applicable, or can be used
1689    during progressively lowering to bring other memory operations closer to
1690    hardware ISA support for a gather. The semantics of the operation closely
1691    correspond to those of the `llvm.masked.gather`
1692    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-gather-intrinsics).
1693
1694    Examples:
1695
1696    ```mlir
1697    %0 = vector.gather %base[%c0][%v], %mask, %pass_thru
1698       : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1699
1700    %1 = vector.gather %base[%i, %j][%v], %mask, %pass_thru
1701       : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1702    ```
1703  }];
1704  let extraClassDeclaration = [{
1705    MemRefType getMemRefType() {
1706      return base().getType().cast<MemRefType>();
1707    }
1708    VectorType getIndexVectorType() {
1709      return index_vec().getType().cast<VectorType>();
1710    }
1711    VectorType getMaskVectorType() {
1712      return mask().getType().cast<VectorType>();
1713    }
1714    VectorType getPassThruVectorType() {
1715      return pass_thru().getType().cast<VectorType>();
1716    }
1717    VectorType getVectorType() {
1718      return result().getType().cast<VectorType>();
1719    }
1720  }];
1721  let assemblyFormat =
1722    "$base `[` $indices `]` `[` $index_vec `]` `,` "
1723    "$mask `,` $pass_thru attr-dict `:` type($base) `,` "
1724    "type($index_vec)  `,` type($mask) `,` type($pass_thru) "
1725    "`into` type($result)";
1726  let hasCanonicalizer = 1;
1727}
1728
1729def Vector_ScatterOp :
1730  Vector_Op<"scatter">,
1731    Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
1732               Variadic<Index>:$indices,
1733	       VectorOfRankAndType<[1], [AnyInteger, Index]>:$index_vec,
1734               VectorOfRankAndType<[1], [I1]>:$mask,
1735               VectorOfRank<[1]>:$valueToStore)> {
1736
1737  let summary = "scatters elements from a vector into memory as defined by an index vector and mask";
1738
1739  let description = [{
1740    The scatter operation scatters elements from a 1-D vector into memory as
1741    defined by a base with indices and an additional 1-D index vector, but
1742    only if the corresponding bit in a 1-D mask vector is set. Otherwise, no
1743    action is taken for that element. Informally the semantics are:
1744    ```
1745    if (mask[0]) base[index[0]] = value[0]
1746    if (mask[1]) base[index[1]] = value[1]
1747    etc.
1748    ```
1749    The vector dialect leaves out-of-bounds and repeated index behavior
1750    undefined. Underlying implementations may enforce strict sequential
1751    semantics for the latter, though.
1752    TODO: enforce the latter always?
1753
1754    The scatter operation can be used directly where applicable, or can be used
1755    during progressively lowering to bring other memory operations closer to
1756    hardware ISA support for a scatter. The semantics of the operation closely
1757    correspond to those of the `llvm.masked.scatter`
1758    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-scatter-intrinsics).
1759
1760    Examples:
1761
1762    ```mlir
1763    vector.scatter %base[%c0][%v], %mask, %value
1764        : memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
1765
1766    vector.scatter %base[%i, %j][%v], %mask, %value
1767        : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
1768    ```
1769  }];
1770  let extraClassDeclaration = [{
1771    MemRefType getMemRefType() {
1772      return base().getType().cast<MemRefType>();
1773    }
1774    VectorType getIndexVectorType() {
1775      return index_vec().getType().cast<VectorType>();
1776    }
1777    VectorType getMaskVectorType() {
1778      return mask().getType().cast<VectorType>();
1779    }
1780    VectorType getVectorType() {
1781      return valueToStore().getType().cast<VectorType>();
1782    }
1783  }];
1784  let assemblyFormat =
1785      "$base `[` $indices `]` `[` $index_vec `]` `,` "
1786      "$mask `,` $valueToStore attr-dict `:` type($base) `,` "
1787      "type($index_vec)  `,` type($mask) `,` type($valueToStore)";
1788  let hasCanonicalizer = 1;
1789}
1790
1791def Vector_ExpandLoadOp :
1792  Vector_Op<"expandload">,
1793    Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
1794               Variadic<Index>:$indices,
1795               VectorOfRankAndType<[1], [I1]>:$mask,
1796               VectorOfRank<[1]>:$pass_thru)>,
1797    Results<(outs VectorOfRank<[1]>:$result)> {
1798
1799  let summary = "reads elements from memory and spreads them into a vector as defined by a mask";
1800
1801  let description = [{
1802    The expand load reads elements from memory into a 1-D vector as defined
1803    by a base with indices and a 1-D mask vector. When the mask is set, the
1804    next element is read from memory. Otherwise, the corresponding element
1805    is taken from a 1-D pass-through vector. Informally the semantics are:
1806    ```
1807    index = i
1808    result[0] := mask[0] ? base[index++] : pass_thru[0]
1809    result[1] := mask[1] ? base[index++] : pass_thru[1]
1810    etc.
1811    ```
1812    Note that the index increment is done conditionally.
1813
1814    The expand load can be used directly where applicable, or can be used
1815    during progressively lowering to bring other memory operations closer to
1816    hardware ISA support for an expand. The semantics of the operation closely
1817    correspond to those of the `llvm.masked.expandload`
1818    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-expandload-intrinsics).
1819
1820    Examples:
1821
1822    ```mlir
1823    %0 = vector.expandload %base[%i], %mask, %pass_thru
1824       : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32>
1825
1826    %1 = vector.expandload %base[%i, %j], %mask, %pass_thru
1827       : memref<?x?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1828    ```
1829  }];
1830  let extraClassDeclaration = [{
1831    MemRefType getMemRefType() {
1832      return base().getType().cast<MemRefType>();
1833    }
1834    VectorType getMaskVectorType() {
1835      return mask().getType().cast<VectorType>();
1836    }
1837    VectorType getPassThruVectorType() {
1838      return pass_thru().getType().cast<VectorType>();
1839    }
1840    VectorType getVectorType() {
1841      return result().getType().cast<VectorType>();
1842    }
1843  }];
1844  let assemblyFormat = "$base `[` $indices `]` `,` $mask `,` $pass_thru attr-dict `:` "
1845    "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)";
1846  let hasCanonicalizer = 1;
1847}
1848
1849def Vector_CompressStoreOp :
1850  Vector_Op<"compressstore">,
1851    Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
1852               Variadic<Index>:$indices,
1853               VectorOfRankAndType<[1], [I1]>:$mask,
1854               VectorOfRank<[1]>:$valueToStore)> {
1855
1856  let summary = "writes elements selectively from a vector as defined by a mask";
1857
1858  let description = [{
1859    The compress store operation writes elements from a 1-D vector into memory
1860    as defined by a base with indices and a 1-D mask vector. When the mask is
1861    set, the corresponding element from the vector is written next to memory.
1862    Otherwise, no action is taken for the element. Informally the semantics are:
1863    ```
1864    index = i
1865    if (mask[0]) base[index++] = value[0]
1866    if (mask[1]) base[index++] = value[1]
1867    etc.
1868    ```
1869    Note that the index increment is done conditionally.
1870
1871    The compress store can be used directly where applicable, or can be used
1872    during progressively lowering to bring other memory operations closer to
1873    hardware ISA support for a compress. The semantics of the operation closely
1874    correspond to those of the `llvm.masked.compressstore`
1875    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-compressstore-intrinsics).
1876
1877    Examples:
1878
1879    ```mlir
1880    vector.compressstore %base[%i], %mask, %value
1881      : memref<?xf32>, vector<8xi1>, vector<8xf32>
1882
1883    vector.compressstore %base[%i, %j], %mask, %value
1884      : memref<?x?xf32>, vector<16xi1>, vector<16xf32>
1885    ```
1886  }];
1887  let extraClassDeclaration = [{
1888    MemRefType getMemRefType() {
1889      return base().getType().cast<MemRefType>();
1890    }
1891    VectorType getMaskVectorType() {
1892      return mask().getType().cast<VectorType>();
1893    }
1894    VectorType getVectorType() {
1895      return valueToStore().getType().cast<VectorType>();
1896    }
1897  }];
1898  let assemblyFormat =
1899      "$base `[` $indices `]` `,` $mask `,` $valueToStore attr-dict `:` "
1900      "type($base) `,` type($mask) `,` type($valueToStore)";
1901  let hasCanonicalizer = 1;
1902}
1903
1904def Vector_ShapeCastOp :
1905  Vector_Op<"shape_cast", [NoSideEffect]>,
1906    Arguments<(ins AnyVector:$source)>,
1907    Results<(outs AnyVector:$result)> {
1908  let summary = "shape_cast casts between vector shapes";
1909  let description = [{
1910    The shape_cast operation casts between an n-D source vector shape and
1911    a k-D result vector shape (the element type remains the same).
1912
1913    If reducing rank (n > k), result dimension sizes must be a product
1914    of contiguous source dimension sizes.
1915    If expanding rank (n < k), source dimensions must factor into a
1916    contiguous sequence of destination dimension sizes.
1917    Each source dim is expanded (or contiguous sequence of source dims combined)
1918    in source dimension list order (i.e. 0 <= i < n), to produce a contiguous
1919    sequence of result dims (or a single result dim), in result dimension list
1920    order (i.e. 0 <= j < k). The product of all source dimension sizes and all
1921    result dimension sizes must match.
1922
1923    It is currently assumed that this operation does not require moving data,
1924    and that it will be folded away before lowering vector operations.
1925
1926    There is an exception to the folding expectation when targeting
1927    llvm.intr.matrix operations. We need a type conversion back and forth from a
1928    2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM
1929    is supported in that particular case, for now.
1930
1931    Example:
1932
1933    ```mlir
1934    // Example casting to a lower vector rank.
1935    %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32>
1936
1937    // Example casting to a higher vector rank.
1938    %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32>
1939
1940    ```
1941  }];
1942  let extraClassDeclaration = [{
1943    VectorType getSourceVectorType() {
1944      return source().getType().cast<VectorType>();
1945    }
1946    VectorType getResultVectorType() {
1947      return getResult().getType().cast<VectorType>();
1948    }
1949  }];
1950  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
1951  let hasFolder = 1;
1952  let hasCanonicalizer = 1;
1953}
1954
1955def Vector_BitCastOp :
1956  Vector_Op<"bitcast", [NoSideEffect, AllRanksMatch<["source", "result"]>]>,
1957    Arguments<(ins AnyVector:$source)>,
1958    Results<(outs AnyVector:$result)>{
1959  let summary = "bitcast casts between vectors";
1960  let description = [{
1961    The bitcast operation casts between vectors of the same rank, the minor 1-D
1962    vector size is casted to a vector with a different element type but same
1963    bitwidth.
1964
1965    Example:
1966
1967    ```mlir
1968    // Example casting to a smaller element type.
1969    %1 = vector.bitcast %0 : vector<5x1x4x3xf32> to vector<5x1x4x6xi16>
1970
1971    // Example casting to a bigger element type.
1972    %3 = vector.bitcast %2 : vector<10x12x8xi8> to vector<10x12x2xi32>
1973
1974    // Example casting to an element type of the same size.
1975    %5 = vector.bitcast %4 : vector<5x1x4x3xf32> to vector<5x1x4x3xi32>
1976    ```
1977  }];
1978  let extraClassDeclaration = [{
1979    VectorType getSourceVectorType() {
1980      return source().getType().cast<VectorType>();
1981    }
1982    VectorType getResultVectorType() {
1983      return getResult().getType().cast<VectorType>();
1984    }
1985  }];
1986  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
1987  let hasFolder = 1;
1988}
1989
1990def Vector_TypeCastOp :
1991  Vector_Op<"type_cast", [NoSideEffect, ViewLikeOpInterface]>,
1992    Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>,
1993    Results<(outs AnyMemRef:$result)> {
1994  let summary = "type_cast op converts a scalar memref to a vector memref";
1995  let description = [{
1996    Performs a conversion from a memref with scalar element to a memref with a
1997    *single* vector element, copying the shape of the memref to the vector. This
1998    is the minimal viable operation that is required to makeke
1999    super-vectorization operational. It can be seen as a special case of the
2000    `view` operation but scoped in the super-vectorization context.
2001
2002    Syntax:
2003
2004    ```
2005    operation ::= `vector.type_cast` ssa-use : memref-type to memref-type
2006    ```
2007
2008    Example:
2009
2010    ```mlir
2011    %A  = alloc() : memref<5x4x3xf32>
2012    %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>>
2013    ```
2014  }];
2015
2016  /// Build the canonical memRefType with a single vector.
2017  /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref<vector<4 x 5 x 6 x f32>>.
2018  let builders = [OpBuilder<(ins "Value":$source)>];
2019
2020  let extraClassDeclaration = [{
2021    MemRefType getMemRefType() {
2022      return memref().getType().cast<MemRefType>();
2023    }
2024    MemRefType getResultMemRefType() {
2025      return getResult().getType().cast<MemRefType>();
2026    }
2027    // Implement ViewLikeOpInterface.
2028    Value getViewSource() { return memref(); }
2029  }];
2030
2031  let assemblyFormat = [{
2032    $memref attr-dict `:` type($memref) `to` type($result)
2033  }];
2034}
2035
2036def Vector_ConstantMaskOp :
2037  Vector_Op<"constant_mask", [NoSideEffect]>,
2038    Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>,
2039    Results<(outs VectorOf<[I1]>)> {
2040  let summary = "creates a constant vector mask";
2041  let description = [{
2042    Creates and returns a vector mask where elements of the result vector
2043    are set to '0' or '1', based on whether the element indices are contained
2044    within a hyper-rectangular region specified by the 'mask_dim_sizes'
2045    array attribute argument. Each element of the 'mask_dim_sizes' array,
2046    specifies an exclusive upper bound [0, mask-dim-size-element-value)
2047    for a unique dimension in the vector result. The conjunction of the ranges
2048    define a hyper-rectangular region within which elements values are set to 1
2049    (otherwise element values are set to 0).
2050
2051    Example:
2052
2053    ```mlir
2054    // create a constant vector mask of size 4x3xi1 with elements in range
2055    // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
2056    %1 = vector.constant_mask [3, 2] : vector<4x3xi1>
2057
2058    print %1
2059                  columns
2060                0    1    2
2061              |------------
2062            0 | 1    1    0
2063      rows  1 | 1    1    0
2064            2 | 1    1    0
2065            3 | 0    0    0
2066    ```
2067  }];
2068
2069  let extraClassDeclaration = [{
2070    static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; }
2071  }];
2072  let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)";
2073}
2074
2075def Vector_CreateMaskOp :
2076  Vector_Op<"create_mask", [NoSideEffect]>,
2077    Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> {
2078  let summary = "creates a vector mask";
2079  let description = [{
2080    Creates and returns a vector mask where elements of the result vector
2081    are set to '0' or '1', based on whether the element indices are contained
2082    within a hyper-rectangular region specified by the operands. Specifically,
2083    each operand specifies a range [0, operand-value) for a unique dimension in
2084    the vector result. The conjunction of the operand ranges define a
2085    hyper-rectangular region within which elements values are set to 1
2086    (otherwise element values are set to 0).
2087
2088    Example:
2089
2090    ```mlir
2091    // create a vector mask of size 4x3xi1 where elements in range
2092    // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
2093    %1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
2094
2095    print %1
2096                  columns
2097                0    1    2
2098              |------------
2099            0 | 1    1    0
2100      rows  1 | 1    1    0
2101            2 | 1    1    0
2102            3 | 0    0    0
2103    ```
2104  }];
2105
2106  let hasCanonicalizer = 1;
2107  let assemblyFormat = "$operands attr-dict `:` type(results)";
2108}
2109
2110def Vector_TransposeOp :
2111  Vector_Op<"transpose", [NoSideEffect,
2112    PredOpTrait<"operand and result have same element type",
2113                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
2114    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$transp)>,
2115    Results<(outs AnyVector:$result)> {
2116  let summary = "vector transpose operation";
2117  let description = [{
2118    Takes a n-D vector and returns the transposed n-D vector defined by
2119    the permutation of ranks in the n-sized integer array attribute.
2120    In the operation
2121
2122    ```mlir
2123    %1 = vector.transpose %0, [i_1, .., i_n]
2124      : vector<d_1 x .. x d_n x f32>
2125      to vector<d_trans[0] x .. x d_trans[n-1] x f32>
2126    ```
2127
2128    the transp array [i_1, .., i_n] must be a permutation of [0, .., n-1].
2129
2130    Example:
2131
2132    ```mlir
2133    %1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
2134
2135     [ [a, b, c],       [ [a, d],
2136       [d, e, f] ]  ->    [b, e],
2137                          [c, f] ]
2138    ```
2139  }];
2140  let builders = [
2141    OpBuilder<(ins "Value":$vector, "ArrayRef<int64_t>":$transp)>
2142  ];
2143  let extraClassDeclaration = [{
2144    VectorType getVectorType() {
2145      return vector().getType().cast<VectorType>();
2146    }
2147    VectorType getResultType() {
2148      return result().getType().cast<VectorType>();
2149    }
2150    void getTransp(SmallVectorImpl<int64_t> &results);
2151    static StringRef getTranspAttrName() { return "transp"; }
2152  }];
2153  let assemblyFormat = [{
2154    $vector `,` $transp attr-dict `:` type($vector) `to` type($result)
2155  }];
2156  let hasCanonicalizer = 1;
2157  let hasFolder = 1;
2158}
2159
2160def Vector_PrintOp :
2161  Vector_Op<"print", []>, Arguments<(ins AnyType:$source)> {
2162  let summary = "print operation (for testing and debugging)";
2163  let description = [{
2164    Prints the source vector (or scalar) to stdout in human readable
2165    format (for testing and debugging). No return value.
2166
2167    Example:
2168
2169    ```mlir
2170    %0 = constant 0.0 : f32
2171    %1 = vector.broadcast %0 : f32 to vector<4xf32>
2172    vector.print %1 : vector<4xf32>
2173
2174    when lowered to LLVM, the vector print is unrolled into
2175    elementary printing method calls that at runtime will yield
2176
2177    ( 0.0, 0.0, 0.0, 0.0 )
2178
2179    on stdout when linked with a small runtime support library,
2180    which only needs to provide a few printing methods (single
2181    value for all data types, opening/closing bracket, comma,
2182    newline).
2183    ```
2184  }];
2185  let verifier = ?;
2186  let extraClassDeclaration = [{
2187    Type getPrintType() {
2188      return source().getType();
2189    }
2190  }];
2191  let assemblyFormat = "$source attr-dict `:` type($source)";
2192}
2193
2194//===----------------------------------------------------------------------===//
2195// Ops used for supporting progressive lowering and conversion type changes.
2196// The Ops are typically not used directly by higher level dialects, but are
2197// used by intra-dialect rewriting rules to bring vector operations closer
2198// to the hardware ISA.
2199//===----------------------------------------------------------------------===//
2200
2201/// Vector dialect matrix multiplication op that operates on flattened 1-D
2202/// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR.
2203/// This may seem redundant with vector.contract but it serves the purposes of
2204/// more progressive lowering and localized type conversion on the path:
2205///   `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
2206def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect,
2207        PredOpTrait<"lhs operand and result have same element type",
2208                    TCresVTEtIsSameAsOpBase<0, 0>>,
2209        PredOpTrait<"rhs operand and result have same element type",
2210                    TCresVTEtIsSameAsOpBase<0, 1>>]>,
2211      Arguments<(
2212        // TODO: tighten vector element types that make sense.
2213        ins VectorOfRankAndType<[1],
2214              [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$lhs,
2215            VectorOfRankAndType<[1],
2216              [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$rhs,
2217            I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>,
2218      Results<(
2219        outs VectorOfRankAndType<[1],
2220               [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$res)>
2221{
2222  let summary = "Vector matrix multiplication op that operates on flattened 1-D"
2223    " MLIR vectors";
2224  let description = [{
2225    This is the counterpart of llvm.matrix.multiply in MLIR. It serves the
2226    purposes of more progressive lowering and localized type conversion.
2227    Higher levels typically lower matrix multiplications into 'vector.contract'
2228    operations. Subsequent rewriting rule progressively lower these operations
2229    into 'vector.matrix_multiply' operations to bring the operations closer
2230    to the hardware ISA.
2231
2232    The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows
2233    and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and
2234    <rhs_columns> and multiplies them. The result matrix is returned embedded in
2235    the result vector.
2236
2237    Also see:
2238
2239    http://llvm.org/docs/LangRef.html#llvm-matrix-multiply-intrinsic
2240
2241    Example:
2242
2243    ```mlir
2244    %C = vector.matrix_multiply %A, %B
2245      { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
2246      (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
2247    ```
2248  }];
2249  let builders = [
2250   OpBuilder<(ins "Value":$lhs, "Value":$rhs, "unsigned":$lhsRows,
2251     "unsigned":$lhsColumns, "unsigned":$rhsColumns),
2252   [{
2253     $_state.addOperands({lhs, rhs});
2254     $_state.addAttribute("lhs_rows",$_builder.getI32IntegerAttr(lhsRows));
2255     $_state.addAttribute("lhs_columns",$_builder.getI32IntegerAttr(lhsColumns));
2256     $_state.addAttribute("rhs_columns",$_builder.getI32IntegerAttr(rhsColumns));
2257     $_state.addTypes(VectorType::get(lhsRows * rhsColumns,
2258       lhs.getType().cast<VectorType>().getElementType()));
2259   }]>,
2260  ];
2261  let verifier = ?;
2262  let assemblyFormat = "$lhs `,` $rhs attr-dict "
2263    "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)";
2264}
2265
2266/// Vector dialect matrix tranposition op that operates on flattened 1-D
2267/// MLIR vectors. This is the counterpart of llvm.matrix.transpose in MLIR.
2268/// This may seem redundant with vector.transpose but it serves the purposes of
2269/// more progressive lowering and localized type conversion on the path:
2270///   `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
2271def Vector_FlatTransposeOp : Vector_Op<"flat_transpose", [NoSideEffect,
2272  PredOpTrait<"source operand and result have same element type",
2273                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
2274    Arguments<(
2275      // TODO: tighten vector element types that make sense.
2276      ins VectorOfRankAndType<[1],
2277            [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$matrix,
2278          I32Attr:$rows, I32Attr:$columns)>,
2279    Results<(
2280      outs VectorOfRankAndType<[1],
2281             [AnySignlessInteger, AnySignedInteger, Index, AnyFloat]>:$res)> {
2282  let summary = "Vector matrix transposition on flattened 1-D MLIR vectors";
2283  let description = [{
2284    This is the counterpart of llvm.matrix.transpose in MLIR. It serves
2285    the purposes of more progressive lowering and localized type conversion.
2286    Higher levels typically lower matrix tranpositions into 'vector.transpose'
2287    operations. Subsequent rewriting rule progressively lower these operations
2288    into 'vector.flat_transpose' operations to bring the operations closer
2289    to the hardware ISA.
2290
2291    The ‘vector.flat_transpose’ op treats the 1-D input `matrix` as
2292    a 2-D matrix with <rows> rows and <columns> columns, and returns the
2293    transposed matrix in flattened form in 'res'.
2294
2295    Also see:
2296
2297    http://llvm.org/docs/LangRef.html#llvm-matrix-transpose-intrinsic
2298
2299    Example:
2300
2301    ```mlir
2302    %1 = vector.flat_transpose %0 { rows = 4: i32, columns = 4: i32 }
2303       : (vector<16xf32>) -> vector<16xf32>
2304    ```
2305  }];
2306  let verifier = ?;
2307  let assemblyFormat = "$matrix attr-dict `:` type($matrix) `->` type($res)";
2308}
2309
2310#endif // VECTOR_OPS
2311