1// RUN: mlir-opt %s -split-input-file -linalg-fold-unit-extent-dims | FileCheck %s
2
3#accesses = [
4  affine_map<(i, j, k, l, m) -> (i, k, m)>,
5  affine_map<(i, j, k, l, m) -> ()>,
6  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
7]
8
9#trait = {
10  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
11  indexing_maps = #accesses,
12  library_call = "some_external_func"
13}
14
15func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>, %arg1 : f32, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32> {
16  %0 = linalg.generic #trait
17     ins(%arg0, %arg1 : tensor<?x1x?xf32>, f32)
18    outs(%shape : tensor<?x1x?x1x?xf32>) {
19       ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
20         linalg.yield %arg3 : f32
21       } -> tensor<?x1x?x1x?xf32>
22  return %0 : tensor<?x1x?x1x?xf32>
23}
24//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
25//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> ()>
26//   CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
27// CHECK-LABEL: func @drop_one_trip_loops
28//       CHECK: linalg.tensor_collapse_shape %{{.*}} {{\[}}[0, 1], [2]]
29//       CHECK: linalg.generic
30//  CHECK-SAME:   indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP3]]]
31//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel"]
32//       CHECK: linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1], [2, 3], [4]]
33
34// -----
35
36#accesses = [
37  affine_map<(i, j, k, l, m) -> (i, k, m)>,
38  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
39]
40
41#trait = {
42  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
43  indexing_maps = #accesses,
44  library_call = "some_external_func"
45}
46
47func @drop_one_trip_loops_indexed
48  (%arg0 : tensor<?x1x?xi32>, %shape: tensor<?x1x?x1x?xi32>) -> tensor<?x1x?x1x?xi32>
49{
50  %0 = linalg.generic #trait
51     ins(%arg0 : tensor<?x1x?xi32>)
52    outs(%shape: tensor<?x1x?x1x?xi32>) {
53       ^bb0(%arg6 : i32, %arg7 : i32) :
54         %idx0 = linalg.index 0 : index
55         %idx1 = linalg.index 1 : index
56         %idx2 = linalg.index 2 : index
57         %idx3 = linalg.index 3 : index
58         %idx4 = linalg.index 4 : index
59         %1 = addi %idx0, %idx1 : index
60         %2 = subi %1, %idx2 : index
61         %3 = subi %2, %idx3 : index
62         %4 = addi %3, %idx4 : index
63         %5 = index_cast %4 : index to i32
64         %6 = addi %5, %arg6 : i32
65         linalg.yield %6 : i32
66       } -> tensor<?x1x?x1x?xi32>
67  return %0 : tensor<?x1x?x1x?xi32>
68}
69// The subtractions disappear the access map of the output tensor maps its unit
70// dimensions 1 and 3 to the index dimensions 2 and 3.
71// CHECK-LABEL: func @drop_one_trip_loops_indexed
72//       CHECK:   linalg.generic
73//       CHECK:   ^{{.+}}(
74//  CHECK-SAME:     %[[ARG4:[a-zA-Z0-9]+]]: i32, %{{.*}}: i32)
75//       CHECK:     %[[IDX0:.+]] = linalg.index 0 : index
76//       CHECK:     %[[IDX1:.+]] = linalg.index 1 : index
77//       CHECK:     %[[IDX2:.+]] = linalg.index 2 : index
78//       CHECK:     %[[T3:.+]] = addi %[[IDX0]], %[[IDX1]]
79//       CHECK:     %[[T4:.+]] = addi %[[T3]], %[[IDX2]]
80//       CHECK:     %[[T5:.+]] = index_cast %[[T4]] : index to i32
81//       CHECK:     %[[T6:.+]] = addi %[[T5]], %[[ARG4]] : i32
82//       CHECK:     linalg.yield %[[T6]] : i32
83
84// -----
85
86#map0 = affine_map<(i, j) -> (i, j)>
87#access = [#map0, #map0]
88#trait = {
89  iterator_types = ["parallel", "parallel"],
90  indexing_maps = #access,
91  library_call = "some_external_func"
92}
93
94func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32>
95{
96  %0 = linalg.generic #trait
97     ins(%arg0 : tensor<1x1xf32>)
98    outs(%arg0 : tensor<1x1xf32>) {
99       ^bb0(%arg1: f32, %arg2: f32) :
100         linalg.yield %arg1 : f32
101       } -> tensor<1x1xf32>
102  return %0 : tensor<1x1xf32>
103}
104//       CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
105// CHECK-LABEL: func @drop_all_loops
106//       CHECK:   linalg.tensor_collapse_shape %{{.*}} []
107//       CHECK:   linalg.generic
108//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP0]]]
109//  CHECK-SAME:     iterator_types = []
110
111// -----
112
113#map0 = affine_map<(i, j) -> (i, j)>
114#access = [#map0, #map0]
115#trait = {
116  iterator_types = ["parallel", "parallel"],
117  indexing_maps = #access,
118  library_call = "some_external_func"
119}
120
121func @drop_all_loops_indexed
122  (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{
123  %0 = linalg.generic #trait
124     ins(%arg0 : tensor<1x1xi32>)
125    outs(%arg0 : tensor<1x1xi32>) {
126       ^bb0(%arg3: i32, %arg4: i32) :
127         %idx0 = linalg.index 0 : index
128         %idx1 = linalg.index 1 : index
129         %1 = addi %idx0, %idx1 : index
130         %2 = index_cast %1 : index to i32
131         %3 = addi %2, %arg3 : i32
132         linalg.yield %3 : i32
133       } -> tensor<1x1xi32>
134  return %0 : tensor<1x1xi32>
135}
136
137// CHECK-LABEL: func @drop_all_loops_indexed
138//       CHECK:   linalg.generic
139//       CHECK:   ^{{.+}}(%[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32)
140//       CHECK:     linalg.yield %[[ARG1]] : i32
141
142// -----
143
144#accesses = [
145  affine_map<(d0) -> (0, d0)>,
146  affine_map<(d0) -> (d0)>
147]
148
149#trait = {
150  indexing_maps = #accesses,
151  iterator_types = ["parallel"],
152  library_call = "some_external_fn"
153}
154
155func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
156  %0 = linalg.generic #trait
157     ins(%arg0 : tensor<1x5xf32>)
158    outs(%shape : tensor<5xf32>) {
159  ^bb0(%arg2: f32, %arg3: f32):     // no predecessors
160    linalg.yield %arg2 : f32
161  } -> tensor<5xf32>
162  return %0 : tensor<5xf32>
163}
164//   CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (d0)>
165
166// CHECK-LABEL: func @leading_dim_1_canonicalization
167//       CHECK:   linalg.tensor_collapse_shape %{{.*}} {{\[}}[0, 1]]
168//       CHECK:   linalg.generic
169//  CHECK-SAME:     indexing_maps = [#[[$MAP1]], #[[$MAP1]]]
170//  CHECK-SAME:     iterator_types = ["parallel"]
171
172// -----
173
174#accesses = [
175  affine_map<(d0, d1) -> (0, d1)>,
176  affine_map<(d0, d1) -> (d0, 0)>,
177  affine_map<(d0, d1) -> (d0, d1)>
178]
179
180#trait = {
181  indexing_maps = #accesses,
182  iterator_types = ["parallel", "parallel"],
183  library_call = "some_external_fn"
184}
185
186func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>, %shape : tensor<5x5xf32>) -> tensor<5x5xf32>
187{
188  %0 = linalg.tensor_expand_shape %arg0 [[0, 1]] : tensor<5xf32> into tensor<1x5xf32>
189  %1 = linalg.tensor_expand_shape %arg1 [[0, 1]] : tensor<5xf32> into tensor<5x1xf32>
190  %2 = linalg.generic #trait
191     ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>)
192    outs(%shape : tensor<5x5xf32>) {
193       ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
194         %3 = addf %arg3, %arg4 : f32
195         linalg.yield %3 : f32
196       } -> tensor<5x5xf32>
197  return %2 : tensor<5x5xf32>
198}
199//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
200//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
201//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
202// CHECK-LABEL: func @broadcast_test
203//   CHECK-NOT:   linalg.tensor_{{.*}}shape
204//       CHECK:   linalg.generic
205//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
206//  CHECK-SAME:     iterator_types = ["parallel", "parallel"]
207//   CHECK-NOT:   linalg.tensor_{{.*}}shape
208
209// -----
210
211#accesses = [
212  affine_map<(d0, d1) -> (0, 0)>,
213  affine_map<(d0, d1) -> (d0, d1)>
214]
215
216#trait = {
217  indexing_maps = #accesses,
218  iterator_types = ["parallel", "parallel"],
219  library_call = "some_external_fn"
220}
221
222func @broadcast_scalar(%arg0 : tensor<1x1xf32>, %shape : tensor<?x?xf32>) -> tensor<?x?xf32>
223{
224   %0 = linalg.generic #trait
225     ins(%arg0 : tensor<1x1xf32>)
226    outs(%shape : tensor<?x?xf32>) {
227      ^bb0(%arg2 : f32, %arg3 : f32):
228        linalg.yield %arg2 : f32
229   } -> tensor<?x?xf32>
230   return %0 : tensor<?x?xf32>
231}
232//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> ()>
233//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
234// CHECK-LABEL: func @broadcast_scalar
235//  CHECK-SAME:   %[[ARG0:.*]]: tensor<1x1xf32>
236//       CHECK:   %[[A:.*]] = linalg.tensor_collapse_shape %[[ARG0]] []
237//  CHECK-SAME:     tensor<1x1xf32> into tensor<f32>
238//       CHECK:   linalg.generic
239//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
240//  CHECK-SAME:     iterator_types = ["parallel", "parallel"]
241//  CHECK-SAME:     %[[A]]
242
243// -----
244
245#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
246#map1 = affine_map<(d0, d1, d2) -> (d2)>
247func @fold_unit_dim_tensor_reshape_op(%arg0 : tensor<5xf32>) -> tensor<2x5xf32>
248{
249  %1 = linalg.init_tensor [1, 2, 5] : tensor<1x2x5xf32>
250  %2 = linalg.generic {i64, indexing_maps = [#map1, #map0],
251    iterator_types = ["parallel", "parallel", "parallel"]}
252    ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) {
253    ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
254      linalg.yield %arg1 : f32
255    } -> tensor<1x2x5xf32>
256  %3 = linalg.tensor_collapse_shape %2 [[0, 1], [2]]
257    : tensor<1x2x5xf32> into tensor<2x5xf32>
258  return %3 : tensor<2x5xf32>
259}
260// CHECK-LABEL: func @fold_unit_dim_tensor_reshape_op
261//       CHECK:   %[[RESULT:.+]] = linalg.generic
262//       CHECK:   return %[[RESULT]]
263
264// -----
265
266func @fold_unit_dim_for_init_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> {
267  %cst = constant 0.0 : f32
268  %init = linalg.init_tensor [1] : tensor<1xf32>
269  %fill = linalg.fill(%cst, %init) : f32, tensor<1xf32> -> tensor<1xf32>
270  %add = linalg.generic {
271      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
272      iterator_types = ["parallel", "reduction"]}
273    ins(%input : tensor<1x1000xf32>)outs(%fill : tensor<1xf32>) {
274  ^bb0(%arg1: f32, %arg2: f32):
275    %1823 = addf %arg1, %arg2 : f32
276    linalg.yield %1823 : f32
277  } -> tensor<1xf32>
278  return %add : tensor<1xf32>
279}
280
281
282//   CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0)>
283//   CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ()>
284
285//       CHECK: func @fold_unit_dim_for_init_tensor
286
287
288//       CHECK: %[[INPUT_RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32>
289//       CHECK: %[[INIT:.+]] = linalg.init_tensor [] : tensor<f32>
290//       CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<f32> -> tensor<f32>
291//       CHECK: %[[GENERIC:.+]] = linalg.generic
292//  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]]]
293//  CHECK-SAME:     iterator_types = ["reduction"]
294//  CHECK-SAME:   ins(%[[INPUT_RESHAPE]] : tensor<1000xf32>)
295//  CHECK-SAME:   outs(%[[FILL]] : tensor<f32>)
296//       CHECK: %[[GENERIC_RESHAPE:.+]] = linalg.tensor_expand_shape %[[GENERIC]] [] : tensor<f32> into tensor<1xf32>
297//       CHECK: return %[[GENERIC_RESHAPE:.+]] : tensor<1xf32>
298
299
300// -----
301
302func @fold_slice(
303    %arg0 : tensor<1x?x?x1x?x1x1xf32>, %arg1 : tensor<1x?x?x?x?x1x1xf32>,
304    %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index,
305    %arg6 : index, %arg7 : index) -> (tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>) {
306  %0 = tensor.extract_slice %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0]
307                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
308      tensor<1x?x?x1x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
309  %1 = tensor.extract_slice %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0]
310                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
311      tensor<1x?x?x?x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
312  return %0, %1 : tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>
313}
314//      CHECK: func @fold_slice
315// CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x?x1x?x1x1xf32>
316// CHECK-SAME:   %[[ARG1:.+]]: tensor<1x?x?x?x?x1x1xf32>
317//      CHECK:   %[[SLICE1:.+]] = tensor.extract_slice %[[ARG0]]
318// CHECK-SAME:       to tensor<?x?x?xf32>
319//      CHECK:   %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SLICE1]]
320// CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
321//      CHECK:   %[[SLICE2:.+]] = tensor.extract_slice %[[ARG1]]
322// CHECK-SAME:       to tensor<?x?x?xf32>
323//      CHECK:   %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SLICE2]]
324// CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
325//      CHECK:   return %[[RESULT1]], %[[RESULT2]]
326
327// -----
328
329func @unit_dim_for_reduction(%arg0: tensor<1x?x1x?xf32>) -> tensor<1x?xf32> {
330  %cst = constant 1.000000e+00 : f32
331  %c3 = constant 3 : index
332  %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32>
333  %1 = linalg.init_tensor [1, %0] : tensor<1x?xf32>
334  %2 = linalg.fill(%cst, %1) : f32, tensor<1x?xf32> -> tensor<1x?xf32>
335  %3 = linalg.generic {
336    indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
337                     affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
338    iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
339    ins(%arg0 : tensor<1x?x1x?xf32>)
340    outs(%2 : tensor<1x?xf32>) {
341  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
342    %4 = addf %arg1, %arg2 : f32
343    linalg.yield %4 : f32
344  } -> tensor<1x?xf32>
345  return %3 : tensor<1x?xf32>
346}
347//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)>
348//  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)>
349//      CHECK: func @unit_dim_for_reduction
350// CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x?xf32>
351//  CHECK-DAG:   %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]]
352//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32>
353//      CHECK:   %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
354//      CHECK:   %[[RESULT:.+]] = linalg.generic
355// CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
356// CHECK-SAME:     iterator_types = ["parallel", "reduction"]
357// CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?x?xf32>)
358// CHECK-SAME:     outs(%[[FILL]] : tensor<?xf32>)
359//      CHECK:   %[[RESULT_RESHAPE:.+]] = linalg.tensor_expand_shape %[[RESULT]] {{\[}}[0, 1]]
360//      CHECK:   return %[[RESULT_RESHAPE]]
361
362// -----
363
364func @unit_dim_for_reduction_keep_one(%arg0: tensor<1x?x1x1xf32>) -> tensor<1x1xf32> {
365  %cst = constant 1.000000e+00 : f32
366  %c3 = constant 3 : index
367  %1 = linalg.init_tensor [1, 1] : tensor<1x1xf32>
368  %2 = linalg.fill(%cst, %1) : f32, tensor<1x1xf32> -> tensor<1x1xf32>
369  %3 = linalg.generic {
370    indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
371                     affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
372    iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
373    ins(%arg0 : tensor<1x?x1x1xf32>)
374    outs(%2 : tensor<1x1xf32>) {
375  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
376    %4 = addf %arg1, %arg2 : f32
377    linalg.yield %4 : f32
378  } -> tensor<1x1xf32>
379  return %3 : tensor<1x1xf32>
380}
381//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)>
382//  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)>
383//      CHECK: func @unit_dim_for_reduction_keep_one
384// CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x1xf32>
385//  CHECK-DAG:   %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]]
386//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [1] : tensor<1xf32>
387//      CHECK:   %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
388//      CHECK:   %[[RESULT:.+]] = linalg.generic
389// CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
390// CHECK-SAME:     iterator_types = ["parallel", "reduction"]
391// CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?x1xf32>)
392// CHECK-SAME:     outs(%[[FILL]] : tensor<1xf32>)
393//      CHECK:   %[[RESULT_RESHAPE:.+]] = linalg.tensor_expand_shape %[[RESULT]] {{\[}}[0, 1]]
394//      CHECK:   return %[[RESULT_RESHAPE]]
395
396// -----
397
398func @unit_dim_for_reduction_inner(%arg0: tensor<?x1x?x1xf32>) -> tensor<?x1xf32> {
399  %cst = constant 1.000000e+00 : f32
400  %c2 = constant 2 : index
401  %0 = tensor.dim %arg0, %c2 : tensor<?x1x?x1xf32>
402  %1 = linalg.init_tensor [%0, 1] : tensor<?x1xf32>
403  %2 = linalg.fill(%cst, %1) : f32, tensor<?x1xf32> -> tensor<?x1xf32>
404  %3 = linalg.generic {
405    indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
406                     affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
407    iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
408    ins(%arg0 : tensor<?x1x?x1xf32>)
409    outs(%2 : tensor<?x1xf32>) {
410  ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
411    %4 = addf %arg1, %arg2 : f32
412    linalg.yield %4 : f32
413  } -> tensor<?x1xf32>
414  return %3 : tensor<?x1xf32>
415}
416//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)>
417//  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)>
418//      CHECK: func @unit_dim_for_reduction_inner
419// CHECK-SAME:   %[[ARG0:.+]]: tensor<?x1x?x1xf32>
420//  CHECK-DAG:   %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]]
421//      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32>
422//      CHECK:   %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
423//      CHECK:   %[[RESULT:.+]] = linalg.generic
424// CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
425// CHECK-SAME:     iterator_types = ["parallel", "reduction"]
426// CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?x?xf32>)
427// CHECK-SAME:     outs(%[[FILL]] : tensor<?xf32>)
428//      CHECK:   %[[RESULT_RESHAPE:.+]] = linalg.tensor_expand_shape %[[RESULT]] {{\[}}[0, 1]]
429//      CHECK:   return %[[RESULT_RESHAPE]]
430
431// -----
432
433func @slice_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> {
434  %0 = tensor.extract_slice %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32>
435  return %0 : tensor<1x1xf32>
436}
437// CHECK-LABEL: func @slice_unit_dims
438//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice
439//  CHECK-SAME:     tensor<1x3xf32> to tensor<f32>
440//       CHECK:   %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SLICE]] []
441//       CHECK:   return %[[RESULT]]
442
443// -----
444
445func @insert_slice_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> {
446  %0 = tensor.insert_slice %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32>
447  return %0 : tensor<1x3xf32>
448}
449// CHECK-LABEL: func @insert_slice_unit_dims
450//       CHECK:   %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} []
451//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[RESHAPE]]
452//  CHECK-SAME:     tensor<f32> into tensor<1x3xf32>
453//       CHECK:   return %[[RESULT]]
454
455// -----
456
457#accesses = [
458  affine_map<(i, j, k, l, m) -> (i, k, m)>,
459  affine_map<(i, j, k, l, m) -> ()>,
460  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
461]
462
463#trait = {
464  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
465  indexing_maps = #accesses,
466  library_call = "some_external_func"
467}
468
469func @drop_one_trip_loops(%arg0 : memref<?x1x?xf32>, %arg1 : f32, %shape: memref<?x1x?x1x?xf32>) -> memref<?x1x?x1x?xf32> {
470  linalg.generic #trait
471     ins(%arg0, %arg1 : memref<?x1x?xf32>, f32)
472    outs(%shape : memref<?x1x?x1x?xf32>) {
473       ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
474         linalg.yield %arg3 : f32
475       }
476  return %shape : memref<?x1x?x1x?xf32>
477}
478//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
479//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> ()>
480//   CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
481// CHECK-LABEL: func @drop_one_trip_loops
482//       CHECK: memref.collapse_shape %{{.*}} {{\[}}[0, 1], [2]]
483//       CHECK: linalg.generic
484//  CHECK-SAME:   indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP3]]]
485//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel"]
486
487// -----
488
489#accesses = [
490  affine_map<(i, j, k, l, m) -> (i, k, m)>,
491  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
492]
493
494#trait = {
495  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
496  indexing_maps = #accesses,
497  library_call = "some_external_func"
498}
499
500func @drop_one_trip_loops_indexed
501  (%arg0 : memref<?x1x?xi32>, %shape: memref<?x1x?x1x?xi32>) -> memref<?x1x?x1x?xi32>
502{
503  linalg.generic #trait
504     ins(%arg0 : memref<?x1x?xi32>)
505    outs(%shape: memref<?x1x?x1x?xi32>) {
506       ^bb0(%arg6 : i32, %arg7 : i32) :
507         %idx0 = linalg.index 0 : index
508         %idx1 = linalg.index 1 : index
509         %idx2 = linalg.index 2 : index
510         %idx3 = linalg.index 3 : index
511         %idx4 = linalg.index 4 : index
512         %1 = addi %idx0, %idx1 : index
513         %2 = subi %1, %idx2 : index
514         %3 = subi %2, %idx3 : index
515         %4 = addi %3, %idx4 : index
516         %5 = index_cast %4 : index to i32
517         %6 = addi %5, %arg6 : i32
518         linalg.yield %6 : i32
519       }
520  return %shape : memref<?x1x?x1x?xi32>
521}
522// The subtractions disappear the access map of the output memref maps its unit
523// dimensions 1 and 3 to the index dimensions 2 and 3.
524// CHECK-LABEL: func @drop_one_trip_loops_indexed
525//       CHECK:   linalg.generic
526//       CHECK:   ^{{.+}}(
527//  CHECK-SAME:     %[[ARG4:[a-zA-Z0-9]+]]: i32, %{{.*}}: i32)
528//       CHECK:     %[[IDX0:.+]] = linalg.index 0 : index
529//       CHECK:     %[[IDX1:.+]] = linalg.index 1 : index
530//       CHECK:     %[[IDX2:.+]] = linalg.index 2 : index
531//       CHECK:     %[[T3:.+]] = addi %[[IDX0]], %[[IDX1]]
532//       CHECK:     %[[T4:.+]] = addi %[[T3]], %[[IDX2]]
533//       CHECK:     %[[T5:.+]] = index_cast %[[T4]] : index to i32
534//       CHECK:     %[[T6:.+]] = addi %[[T5]], %[[ARG4]] : i32
535//       CHECK:     linalg.yield %[[T6]] : i32
536
537// -----
538
539#map0 = affine_map<(i, j) -> (i, j)>
540#access = [#map0, #map0]
541#trait = {
542  iterator_types = ["parallel", "parallel"],
543  indexing_maps = #access,
544  library_call = "some_external_func"
545}
546
547func @drop_all_loops(%arg0 : memref<1x1xf32>) -> memref<1x1xf32>
548{
549  linalg.generic #trait
550     ins(%arg0 : memref<1x1xf32>)
551    outs(%arg0 : memref<1x1xf32>) {
552       ^bb0(%arg1: f32, %arg2: f32) :
553         linalg.yield %arg1 : f32
554       }
555  return %arg0 : memref<1x1xf32>
556}
557//       CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
558// CHECK-LABEL: func @drop_all_loops
559//       CHECK:   memref.collapse_shape %{{.*}} []
560//       CHECK:   linalg.generic
561//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP0]]]
562//  CHECK-SAME:     iterator_types = []
563
564// -----
565
566#map0 = affine_map<(i, j) -> (i, j)>
567#access = [#map0, #map0]
568#trait = {
569  iterator_types = ["parallel", "parallel"],
570  indexing_maps = #access,
571  library_call = "some_external_func"
572}
573
574func @drop_all_loops_indexed
575  (%arg0 : memref<1x1xi32>) -> memref<1x1xi32>{
576  linalg.generic #trait
577     ins(%arg0 : memref<1x1xi32>)
578    outs(%arg0 : memref<1x1xi32>) {
579       ^bb0(%arg3: i32, %arg4: i32) :
580         %idx0 = linalg.index 0 : index
581         %idx1 = linalg.index 1 : index
582         %1 = addi %idx0, %idx1 : index
583         %2 = index_cast %1 : index to i32
584         %3 = addi %2, %arg3 : i32
585         linalg.yield %3 : i32
586       }
587  return %arg0 : memref<1x1xi32>
588}
589
590// CHECK-LABEL: func @drop_all_loops_indexed
591//       CHECK:   linalg.generic
592//       CHECK:   ^{{.+}}(%[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32)
593//       CHECK:     linalg.yield %[[ARG1]] : i32
594
595// -----
596
597#accesses = [
598  affine_map<(d0) -> (0, d0)>,
599  affine_map<(d0) -> (d0)>
600]
601
602#trait = {
603  indexing_maps = #accesses,
604  iterator_types = ["parallel"],
605  library_call = "some_external_fn"
606}
607
608func @leading_dim_1_canonicalization(%arg0: memref<1x5xf32>, %shape: memref<5xf32>) -> memref<5xf32> {
609  linalg.generic #trait
610     ins(%arg0 : memref<1x5xf32>)
611    outs(%shape : memref<5xf32>) {
612  ^bb0(%arg2: f32, %arg3: f32):     // no predecessors
613    linalg.yield %arg2 : f32
614  }
615  return %shape : memref<5xf32>
616}
617//   CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (d0)>
618
619// CHECK-LABEL: func @leading_dim_1_canonicalization
620//       CHECK:   memref.collapse_shape %{{.*}} {{\[}}[0, 1]]
621//       CHECK:   linalg.generic
622//  CHECK-SAME:     indexing_maps = [#[[$MAP1]], #[[$MAP1]]]
623//  CHECK-SAME:     iterator_types = ["parallel"]
624
625// -----
626
627#accesses = [
628  affine_map<(d0, d1) -> (0, d1)>,
629  affine_map<(d0, d1) -> (d0, 0)>,
630  affine_map<(d0, d1) -> (d0, d1)>
631]
632
633#trait = {
634  indexing_maps = #accesses,
635  iterator_types = ["parallel", "parallel"],
636  library_call = "some_external_fn"
637}
638
639func @broadcast_test(%arg0 : memref<5xf32>, %arg1 : memref<5xf32>, %shape : memref<5x5xf32>) -> memref<5x5xf32>
640{
641  %0 = memref.expand_shape %arg0 [[0, 1]] : memref<5xf32> into memref<1x5xf32>
642  %1 = memref.expand_shape %arg1 [[0, 1]] : memref<5xf32> into memref<5x1xf32>
643  linalg.generic #trait
644     ins(%0, %1 : memref<1x5xf32>, memref<5x1xf32>)
645    outs(%shape : memref<5x5xf32>) {
646       ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
647         %3 = addf %arg3, %arg4 : f32
648         linalg.yield %3 : f32
649       }
650  return %shape : memref<5x5xf32>
651}
652//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
653//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
654//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
655// CHECK-LABEL: func @broadcast_test
656//   CHECK-NOT:   linalg.memref_{{.*}}shape
657//       CHECK:   linalg.generic
658//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
659//  CHECK-SAME:     iterator_types = ["parallel", "parallel"]
660//   CHECK-NOT:   linalg.memref_{{.*}}shape
661
662// -----
663
664#accesses = [
665  affine_map<(d0, d1) -> (0, 0)>,
666  affine_map<(d0, d1) -> (d0, d1)>
667]
668
669#trait = {
670  indexing_maps = #accesses,
671  iterator_types = ["parallel", "parallel"],
672  library_call = "some_external_fn"
673}
674
675func @broadcast_scalar(%arg0 : memref<1x1xf32>, %shape : memref<?x?xf32>) -> memref<?x?xf32>
676{
677   linalg.generic #trait
678     ins(%arg0 : memref<1x1xf32>)
679    outs(%shape : memref<?x?xf32>) {
680      ^bb0(%arg2 : f32, %arg3 : f32):
681        linalg.yield %arg2 : f32
682   }
683   return %shape : memref<?x?xf32>
684}
685//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> ()>
686//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
687// CHECK-LABEL: func @broadcast_scalar
688//  CHECK-SAME:   %[[ARG0:.*]]: memref<1x1xf32>
689//       CHECK:   %[[A:.*]] = memref.collapse_shape %[[ARG0]] []
690//  CHECK-SAME:     memref<1x1xf32> into memref<f32>
691//       CHECK:   linalg.generic
692//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
693//  CHECK-SAME:     iterator_types = ["parallel", "parallel"]
694//  CHECK-SAME:     %[[A]]
695
696// -----
697
698#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
699#map1 = affine_map<(d0, d1, d2) -> (d2)>
700func @fold_unit_dim_memref_reshape_op(%arg0 : memref<5xf32>) -> memref<2x5xf32>
701{
702  %1 = memref.alloc() : memref<1x2x5xf32>
703  linalg.generic {i64, indexing_maps = [#map1, #map0],
704    iterator_types = ["parallel", "parallel", "parallel"]}
705    ins(%arg0 : memref<5xf32>) outs(%1 : memref<1x2x5xf32>) {
706    ^bb0(%arg1: f32, %arg2: f32):  // no predecessors
707      linalg.yield %arg1 : f32
708    }
709  %3 = memref.collapse_shape %1 [[0, 1], [2]]
710    : memref<1x2x5xf32> into memref<2x5xf32>
711  return %3 : memref<2x5xf32>
712}
713// CHECK-LABEL: func @fold_unit_dim_memref_reshape_op
714//       CHECK:   %[[ALLOC:.*]] = memref.alloc() : memref<1x2x5xf32>
715//       CHECK:   %[[OUT:.*]] = memref.collapse_shape %[[ALLOC]]
716//       CHECK:   linalg.generic
717//       CHECK-SAME:   outs(%[[OUT:.*]] :
718//       CHECK:   %[[RESULT:.*]] = memref.collapse_shape %[[ALLOC]]
719//       CHECK:   return %[[RESULT]]
720
721// -----
722
723func @fold_unit_dim_for_init_memref(%input: memref<1x1000xf32>) -> memref<1xf32> {
724  %cst = constant 0.0 : f32
725  %init = memref.alloc() : memref<1xf32>
726  linalg.generic {
727      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
728      iterator_types = ["parallel", "reduction"]}
729    ins(%input : memref<1x1000xf32>)outs(%init : memref<1xf32>) {
730  ^bb0(%arg1: f32, %arg2: f32):
731    %1823 = addf %arg1, %arg2 : f32
732    linalg.yield %1823 : f32
733  }
734  return %init : memref<1xf32>
735}
736
737
738//   CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0)>
739//   CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ()>
740
741//       CHECK: func @fold_unit_dim_for_init_memref
742//       CHECK: %[[INIT:.+]] = memref.alloc() : memref<1xf32>
743//       CHECK: %[[INPUT_RESHAPE:.+]] = memref.collapse_shape %{{.+}} {{\[}}[0, 1]] : memref<1x1000xf32> into memref<1000xf32>
744//       CHECK: %[[INIT_RESHAPE:.+]] = memref.collapse_shape %[[INIT]] [] : memref<1xf32> into memref<f32>
745//       CHECK: linalg.generic
746//  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]]]
747//  CHECK-SAME:     iterator_types = ["reduction"]
748//  CHECK-SAME:   ins(%[[INPUT_RESHAPE]] : memref<1000xf32>)
749//  CHECK-SAME:   outs(%[[INIT_RESHAPE]] : memref<f32>)
750//       CHECK: return %[[INIT:.+]] : memref<1xf32>
751
752
753
754