1// RUN: mlir-opt %s -split-input-file -linalg-fold-unit-extent-dims | FileCheck %s 2 3#accesses = [ 4 affine_map<(i, j, k, l, m) -> (i, k, m)>, 5 affine_map<(i, j, k, l, m) -> ()>, 6 affine_map<(i, j, k, l, m) -> (i, k, j, l, m)> 7] 8 9#trait = { 10 iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"], 11 indexing_maps = #accesses, 12 library_call = "some_external_func" 13} 14 15func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>, %arg1 : f32, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32> { 16 %0 = linalg.generic #trait 17 ins(%arg0, %arg1 : tensor<?x1x?xf32>, f32) 18 outs(%shape : tensor<?x1x?x1x?xf32>) { 19 ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : 20 linalg.yield %arg3 : f32 21 } -> tensor<?x1x?x1x?xf32> 22 return %0 : tensor<?x1x?x1x?xf32> 23} 24// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)> 25// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> ()> 26// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 27// CHECK-LABEL: func @drop_one_trip_loops 28// CHECK: linalg.tensor_collapse_shape %{{.*}} {{\[}}[0, 1], [2]] 29// CHECK: linalg.generic 30// CHECK-SAME: indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP3]]] 31// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] 32// CHECK: linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1], [2, 3], [4]] 33 34// ----- 35 36#accesses = [ 37 affine_map<(i, j, k, l, m) -> (i, k, m)>, 38 affine_map<(i, j, k, l, m) -> (i, k, j, l, m)> 39] 40 41#trait = { 42 iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"], 43 indexing_maps = #accesses, 44 library_call = "some_external_func" 45} 46 47func @drop_one_trip_loops_indexed 48 (%arg0 : tensor<?x1x?xi32>, %shape: tensor<?x1x?x1x?xi32>) -> tensor<?x1x?x1x?xi32> 49{ 50 %0 = linalg.generic #trait 51 ins(%arg0 : tensor<?x1x?xi32>) 52 outs(%shape: tensor<?x1x?x1x?xi32>) { 53 ^bb0(%arg6 : i32, %arg7 : i32) : 54 %idx0 = linalg.index 0 : index 55 %idx1 = linalg.index 1 : index 56 %idx2 = linalg.index 2 : index 57 %idx3 = linalg.index 3 : index 58 %idx4 = linalg.index 4 : index 59 %1 = addi %idx0, %idx1 : index 60 %2 = subi %1, %idx2 : index 61 %3 = subi %2, %idx3 : index 62 %4 = addi %3, %idx4 : index 63 %5 = index_cast %4 : index to i32 64 %6 = addi %5, %arg6 : i32 65 linalg.yield %6 : i32 66 } -> tensor<?x1x?x1x?xi32> 67 return %0 : tensor<?x1x?x1x?xi32> 68} 69// The subtractions disappear the access map of the output tensor maps its unit 70// dimensions 1 and 3 to the index dimensions 2 and 3. 71// CHECK-LABEL: func @drop_one_trip_loops_indexed 72// CHECK: linalg.generic 73// CHECK: ^{{.+}}( 74// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: i32, %{{.*}}: i32) 75// CHECK: %[[IDX0:.+]] = linalg.index 0 : index 76// CHECK: %[[IDX1:.+]] = linalg.index 1 : index 77// CHECK: %[[IDX2:.+]] = linalg.index 2 : index 78// CHECK: %[[T3:.+]] = addi %[[IDX0]], %[[IDX1]] 79// CHECK: %[[T4:.+]] = addi %[[T3]], %[[IDX2]] 80// CHECK: %[[T5:.+]] = index_cast %[[T4]] : index to i32 81// CHECK: %[[T6:.+]] = addi %[[T5]], %[[ARG4]] : i32 82// CHECK: linalg.yield %[[T6]] : i32 83 84// ----- 85 86#map0 = affine_map<(i, j) -> (i, j)> 87#access = [#map0, #map0] 88#trait = { 89 iterator_types = ["parallel", "parallel"], 90 indexing_maps = #access, 91 library_call = "some_external_func" 92} 93 94func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32> 95{ 96 %0 = linalg.generic #trait 97 ins(%arg0 : tensor<1x1xf32>) 98 outs(%arg0 : tensor<1x1xf32>) { 99 ^bb0(%arg1: f32, %arg2: f32) : 100 linalg.yield %arg1 : f32 101 } -> tensor<1x1xf32> 102 return %0 : tensor<1x1xf32> 103} 104// CHECK: #[[$MAP0:.*]] = affine_map<() -> ()> 105// CHECK-LABEL: func @drop_all_loops 106// CHECK: linalg.tensor_collapse_shape %{{.*}} [] 107// CHECK: linalg.generic 108// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]]] 109// CHECK-SAME: iterator_types = [] 110 111// ----- 112 113#map0 = affine_map<(i, j) -> (i, j)> 114#access = [#map0, #map0] 115#trait = { 116 iterator_types = ["parallel", "parallel"], 117 indexing_maps = #access, 118 library_call = "some_external_func" 119} 120 121func @drop_all_loops_indexed 122 (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{ 123 %0 = linalg.generic #trait 124 ins(%arg0 : tensor<1x1xi32>) 125 outs(%arg0 : tensor<1x1xi32>) { 126 ^bb0(%arg3: i32, %arg4: i32) : 127 %idx0 = linalg.index 0 : index 128 %idx1 = linalg.index 1 : index 129 %1 = addi %idx0, %idx1 : index 130 %2 = index_cast %1 : index to i32 131 %3 = addi %2, %arg3 : i32 132 linalg.yield %3 : i32 133 } -> tensor<1x1xi32> 134 return %0 : tensor<1x1xi32> 135} 136 137// CHECK-LABEL: func @drop_all_loops_indexed 138// CHECK: linalg.generic 139// CHECK: ^{{.+}}(%[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32) 140// CHECK: linalg.yield %[[ARG1]] : i32 141 142// ----- 143 144#accesses = [ 145 affine_map<(d0) -> (0, d0)>, 146 affine_map<(d0) -> (d0)> 147] 148 149#trait = { 150 indexing_maps = #accesses, 151 iterator_types = ["parallel"], 152 library_call = "some_external_fn" 153} 154 155func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> { 156 %0 = linalg.generic #trait 157 ins(%arg0 : tensor<1x5xf32>) 158 outs(%shape : tensor<5xf32>) { 159 ^bb0(%arg2: f32, %arg3: f32): // no predecessors 160 linalg.yield %arg2 : f32 161 } -> tensor<5xf32> 162 return %0 : tensor<5xf32> 163} 164// CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (d0)> 165 166// CHECK-LABEL: func @leading_dim_1_canonicalization 167// CHECK: linalg.tensor_collapse_shape %{{.*}} {{\[}}[0, 1]] 168// CHECK: linalg.generic 169// CHECK-SAME: indexing_maps = [#[[$MAP1]], #[[$MAP1]]] 170// CHECK-SAME: iterator_types = ["parallel"] 171 172// ----- 173 174#accesses = [ 175 affine_map<(d0, d1) -> (0, d1)>, 176 affine_map<(d0, d1) -> (d0, 0)>, 177 affine_map<(d0, d1) -> (d0, d1)> 178] 179 180#trait = { 181 indexing_maps = #accesses, 182 iterator_types = ["parallel", "parallel"], 183 library_call = "some_external_fn" 184} 185 186func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>, %shape : tensor<5x5xf32>) -> tensor<5x5xf32> 187{ 188 %0 = linalg.tensor_expand_shape %arg0 [[0, 1]] : tensor<5xf32> into tensor<1x5xf32> 189 %1 = linalg.tensor_expand_shape %arg1 [[0, 1]] : tensor<5xf32> into tensor<5x1xf32> 190 %2 = linalg.generic #trait 191 ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>) 192 outs(%shape : tensor<5x5xf32>) { 193 ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): 194 %3 = addf %arg3, %arg4 : f32 195 linalg.yield %3 : f32 196 } -> tensor<5x5xf32> 197 return %2 : tensor<5x5xf32> 198} 199// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)> 200// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)> 201// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> 202// CHECK-LABEL: func @broadcast_test 203// CHECK-NOT: linalg.tensor_{{.*}}shape 204// CHECK: linalg.generic 205// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] 206// CHECK-SAME: iterator_types = ["parallel", "parallel"] 207// CHECK-NOT: linalg.tensor_{{.*}}shape 208 209// ----- 210 211#accesses = [ 212 affine_map<(d0, d1) -> (0, 0)>, 213 affine_map<(d0, d1) -> (d0, d1)> 214] 215 216#trait = { 217 indexing_maps = #accesses, 218 iterator_types = ["parallel", "parallel"], 219 library_call = "some_external_fn" 220} 221 222func @broadcast_scalar(%arg0 : tensor<1x1xf32>, %shape : tensor<?x?xf32>) -> tensor<?x?xf32> 223{ 224 %0 = linalg.generic #trait 225 ins(%arg0 : tensor<1x1xf32>) 226 outs(%shape : tensor<?x?xf32>) { 227 ^bb0(%arg2 : f32, %arg3 : f32): 228 linalg.yield %arg2 : f32 229 } -> tensor<?x?xf32> 230 return %0 : tensor<?x?xf32> 231} 232// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> ()> 233// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> 234// CHECK-LABEL: func @broadcast_scalar 235// CHECK-SAME: %[[ARG0:.*]]: tensor<1x1xf32> 236// CHECK: %[[A:.*]] = linalg.tensor_collapse_shape %[[ARG0]] [] 237// CHECK-SAME: tensor<1x1xf32> into tensor<f32> 238// CHECK: linalg.generic 239// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] 240// CHECK-SAME: iterator_types = ["parallel", "parallel"] 241// CHECK-SAME: %[[A]] 242 243// ----- 244 245#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 246#map1 = affine_map<(d0, d1, d2) -> (d2)> 247func @fold_unit_dim_tensor_reshape_op(%arg0 : tensor<5xf32>) -> tensor<2x5xf32> 248{ 249 %1 = linalg.init_tensor [1, 2, 5] : tensor<1x2x5xf32> 250 %2 = linalg.generic {i64, indexing_maps = [#map1, #map0], 251 iterator_types = ["parallel", "parallel", "parallel"]} 252 ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) { 253 ^bb0(%arg1: f32, %arg2: f32): // no predecessors 254 linalg.yield %arg1 : f32 255 } -> tensor<1x2x5xf32> 256 %3 = linalg.tensor_collapse_shape %2 [[0, 1], [2]] 257 : tensor<1x2x5xf32> into tensor<2x5xf32> 258 return %3 : tensor<2x5xf32> 259} 260// CHECK-LABEL: func @fold_unit_dim_tensor_reshape_op 261// CHECK: %[[RESULT:.+]] = linalg.generic 262// CHECK: return %[[RESULT]] 263 264// ----- 265 266func @fold_unit_dim_for_init_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> { 267 %cst = constant 0.0 : f32 268 %init = linalg.init_tensor [1] : tensor<1xf32> 269 %fill = linalg.fill(%cst, %init) : f32, tensor<1xf32> -> tensor<1xf32> 270 %add = linalg.generic { 271 indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], 272 iterator_types = ["parallel", "reduction"]} 273 ins(%input : tensor<1x1000xf32>)outs(%fill : tensor<1xf32>) { 274 ^bb0(%arg1: f32, %arg2: f32): 275 %1823 = addf %arg1, %arg2 : f32 276 linalg.yield %1823 : f32 277 } -> tensor<1xf32> 278 return %add : tensor<1xf32> 279} 280 281 282// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0)> 283// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ()> 284 285// CHECK: func @fold_unit_dim_for_init_tensor 286 287 288// CHECK: %[[INPUT_RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32> 289// CHECK: %[[INIT:.+]] = linalg.init_tensor [] : tensor<f32> 290// CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<f32> -> tensor<f32> 291// CHECK: %[[GENERIC:.+]] = linalg.generic 292// CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] 293// CHECK-SAME: iterator_types = ["reduction"] 294// CHECK-SAME: ins(%[[INPUT_RESHAPE]] : tensor<1000xf32>) 295// CHECK-SAME: outs(%[[FILL]] : tensor<f32>) 296// CHECK: %[[GENERIC_RESHAPE:.+]] = linalg.tensor_expand_shape %[[GENERIC]] [] : tensor<f32> into tensor<1xf32> 297// CHECK: return %[[GENERIC_RESHAPE:.+]] : tensor<1xf32> 298 299 300// ----- 301 302func @fold_slice( 303 %arg0 : tensor<1x?x?x1x?x1x1xf32>, %arg1 : tensor<1x?x?x?x?x1x1xf32>, 304 %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, 305 %arg6 : index, %arg7 : index) -> (tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>) { 306 %0 = tensor.extract_slice %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0] 307 [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] : 308 tensor<1x?x?x1x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32> 309 %1 = tensor.extract_slice %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0] 310 [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] : 311 tensor<1x?x?x?x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32> 312 return %0, %1 : tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32> 313} 314// CHECK: func @fold_slice 315// CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x?x1x?x1x1xf32> 316// CHECK-SAME: %[[ARG1:.+]]: tensor<1x?x?x?x?x1x1xf32> 317// CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG0]] 318// CHECK-SAME: to tensor<?x?x?xf32> 319// CHECK: %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SLICE1]] 320// CHECK-SAME: [0, 1], [2], [3, 4, 5, 6] 321// CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG1]] 322// CHECK-SAME: to tensor<?x?x?xf32> 323// CHECK: %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SLICE2]] 324// CHECK-SAME: [0, 1], [2], [3, 4, 5, 6] 325// CHECK: return %[[RESULT1]], %[[RESULT2]] 326 327// ----- 328 329func @unit_dim_for_reduction(%arg0: tensor<1x?x1x?xf32>) -> tensor<1x?xf32> { 330 %cst = constant 1.000000e+00 : f32 331 %c3 = constant 3 : index 332 %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32> 333 %1 = linalg.init_tensor [1, %0] : tensor<1x?xf32> 334 %2 = linalg.fill(%cst, %1) : f32, tensor<1x?xf32> -> tensor<1x?xf32> 335 %3 = linalg.generic { 336 indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, 337 affine_map<(d0, d1, d2, d3) -> (d0, d1)>], 338 iterator_types = ["parallel", "parallel", "reduction", "reduction"]} 339 ins(%arg0 : tensor<1x?x1x?xf32>) 340 outs(%2 : tensor<1x?xf32>) { 341 ^bb0(%arg1: f32, %arg2: f32): // no predecessors 342 %4 = addf %arg1, %arg2 : f32 343 linalg.yield %4 : f32 344 } -> tensor<1x?xf32> 345 return %3 : tensor<1x?xf32> 346} 347// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)> 348// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)> 349// CHECK: func @unit_dim_for_reduction 350// CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x?xf32> 351// CHECK-DAG: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]] 352// CHECK: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32> 353// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]]) 354// CHECK: %[[RESULT:.+]] = linalg.generic 355// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] 356// CHECK-SAME: iterator_types = ["parallel", "reduction"] 357// CHECK-SAME: ins(%[[RESHAPE]] : tensor<?x?xf32>) 358// CHECK-SAME: outs(%[[FILL]] : tensor<?xf32>) 359// CHECK: %[[RESULT_RESHAPE:.+]] = linalg.tensor_expand_shape %[[RESULT]] {{\[}}[0, 1]] 360// CHECK: return %[[RESULT_RESHAPE]] 361 362// ----- 363 364func @unit_dim_for_reduction_keep_one(%arg0: tensor<1x?x1x1xf32>) -> tensor<1x1xf32> { 365 %cst = constant 1.000000e+00 : f32 366 %c3 = constant 3 : index 367 %1 = linalg.init_tensor [1, 1] : tensor<1x1xf32> 368 %2 = linalg.fill(%cst, %1) : f32, tensor<1x1xf32> -> tensor<1x1xf32> 369 %3 = linalg.generic { 370 indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, 371 affine_map<(d0, d1, d2, d3) -> (d0, d1)>], 372 iterator_types = ["parallel", "parallel", "reduction", "reduction"]} 373 ins(%arg0 : tensor<1x?x1x1xf32>) 374 outs(%2 : tensor<1x1xf32>) { 375 ^bb0(%arg1: f32, %arg2: f32): // no predecessors 376 %4 = addf %arg1, %arg2 : f32 377 linalg.yield %4 : f32 378 } -> tensor<1x1xf32> 379 return %3 : tensor<1x1xf32> 380} 381// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)> 382// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)> 383// CHECK: func @unit_dim_for_reduction_keep_one 384// CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x1xf32> 385// CHECK-DAG: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]] 386// CHECK: %[[INIT:.+]] = linalg.init_tensor [1] : tensor<1xf32> 387// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]]) 388// CHECK: %[[RESULT:.+]] = linalg.generic 389// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] 390// CHECK-SAME: iterator_types = ["parallel", "reduction"] 391// CHECK-SAME: ins(%[[RESHAPE]] : tensor<?x1xf32>) 392// CHECK-SAME: outs(%[[FILL]] : tensor<1xf32>) 393// CHECK: %[[RESULT_RESHAPE:.+]] = linalg.tensor_expand_shape %[[RESULT]] {{\[}}[0, 1]] 394// CHECK: return %[[RESULT_RESHAPE]] 395 396// ----- 397 398func @unit_dim_for_reduction_inner(%arg0: tensor<?x1x?x1xf32>) -> tensor<?x1xf32> { 399 %cst = constant 1.000000e+00 : f32 400 %c2 = constant 2 : index 401 %0 = tensor.dim %arg0, %c2 : tensor<?x1x?x1xf32> 402 %1 = linalg.init_tensor [%0, 1] : tensor<?x1xf32> 403 %2 = linalg.fill(%cst, %1) : f32, tensor<?x1xf32> -> tensor<?x1xf32> 404 %3 = linalg.generic { 405 indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, 406 affine_map<(d0, d1, d2, d3) -> (d0, d1)>], 407 iterator_types = ["parallel", "parallel", "reduction", "reduction"]} 408 ins(%arg0 : tensor<?x1x?x1xf32>) 409 outs(%2 : tensor<?x1xf32>) { 410 ^bb0(%arg1: f32, %arg2: f32): // no predecessors 411 %4 = addf %arg1, %arg2 : f32 412 linalg.yield %4 : f32 413 } -> tensor<?x1xf32> 414 return %3 : tensor<?x1xf32> 415} 416// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)> 417// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d0)> 418// CHECK: func @unit_dim_for_reduction_inner 419// CHECK-SAME: %[[ARG0:.+]]: tensor<?x1x?x1xf32> 420// CHECK-DAG: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]] 421// CHECK: %[[INIT:.+]] = linalg.init_tensor [%{{.+}}] : tensor<?xf32> 422// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]]) 423// CHECK: %[[RESULT:.+]] = linalg.generic 424// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] 425// CHECK-SAME: iterator_types = ["parallel", "reduction"] 426// CHECK-SAME: ins(%[[RESHAPE]] : tensor<?x?xf32>) 427// CHECK-SAME: outs(%[[FILL]] : tensor<?xf32>) 428// CHECK: %[[RESULT_RESHAPE:.+]] = linalg.tensor_expand_shape %[[RESULT]] {{\[}}[0, 1]] 429// CHECK: return %[[RESULT_RESHAPE]] 430 431// ----- 432 433func @slice_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> { 434 %0 = tensor.extract_slice %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32> 435 return %0 : tensor<1x1xf32> 436} 437// CHECK-LABEL: func @slice_unit_dims 438// CHECK: %[[SLICE:.+]] = tensor.extract_slice 439// CHECK-SAME: tensor<1x3xf32> to tensor<f32> 440// CHECK: %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SLICE]] [] 441// CHECK: return %[[RESULT]] 442 443// ----- 444 445func @insert_slice_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> { 446 %0 = tensor.insert_slice %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32> 447 return %0 : tensor<1x3xf32> 448} 449// CHECK-LABEL: func @insert_slice_unit_dims 450// CHECK: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} [] 451// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[RESHAPE]] 452// CHECK-SAME: tensor<f32> into tensor<1x3xf32> 453// CHECK: return %[[RESULT]] 454 455// ----- 456 457#accesses = [ 458 affine_map<(i, j, k, l, m) -> (i, k, m)>, 459 affine_map<(i, j, k, l, m) -> ()>, 460 affine_map<(i, j, k, l, m) -> (i, k, j, l, m)> 461] 462 463#trait = { 464 iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"], 465 indexing_maps = #accesses, 466 library_call = "some_external_func" 467} 468 469func @drop_one_trip_loops(%arg0 : memref<?x1x?xf32>, %arg1 : f32, %shape: memref<?x1x?x1x?xf32>) -> memref<?x1x?x1x?xf32> { 470 linalg.generic #trait 471 ins(%arg0, %arg1 : memref<?x1x?xf32>, f32) 472 outs(%shape : memref<?x1x?x1x?xf32>) { 473 ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : 474 linalg.yield %arg3 : f32 475 } 476 return %shape : memref<?x1x?x1x?xf32> 477} 478// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)> 479// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> ()> 480// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 481// CHECK-LABEL: func @drop_one_trip_loops 482// CHECK: memref.collapse_shape %{{.*}} {{\[}}[0, 1], [2]] 483// CHECK: linalg.generic 484// CHECK-SAME: indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP3]]] 485// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] 486 487// ----- 488 489#accesses = [ 490 affine_map<(i, j, k, l, m) -> (i, k, m)>, 491 affine_map<(i, j, k, l, m) -> (i, k, j, l, m)> 492] 493 494#trait = { 495 iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"], 496 indexing_maps = #accesses, 497 library_call = "some_external_func" 498} 499 500func @drop_one_trip_loops_indexed 501 (%arg0 : memref<?x1x?xi32>, %shape: memref<?x1x?x1x?xi32>) -> memref<?x1x?x1x?xi32> 502{ 503 linalg.generic #trait 504 ins(%arg0 : memref<?x1x?xi32>) 505 outs(%shape: memref<?x1x?x1x?xi32>) { 506 ^bb0(%arg6 : i32, %arg7 : i32) : 507 %idx0 = linalg.index 0 : index 508 %idx1 = linalg.index 1 : index 509 %idx2 = linalg.index 2 : index 510 %idx3 = linalg.index 3 : index 511 %idx4 = linalg.index 4 : index 512 %1 = addi %idx0, %idx1 : index 513 %2 = subi %1, %idx2 : index 514 %3 = subi %2, %idx3 : index 515 %4 = addi %3, %idx4 : index 516 %5 = index_cast %4 : index to i32 517 %6 = addi %5, %arg6 : i32 518 linalg.yield %6 : i32 519 } 520 return %shape : memref<?x1x?x1x?xi32> 521} 522// The subtractions disappear the access map of the output memref maps its unit 523// dimensions 1 and 3 to the index dimensions 2 and 3. 524// CHECK-LABEL: func @drop_one_trip_loops_indexed 525// CHECK: linalg.generic 526// CHECK: ^{{.+}}( 527// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: i32, %{{.*}}: i32) 528// CHECK: %[[IDX0:.+]] = linalg.index 0 : index 529// CHECK: %[[IDX1:.+]] = linalg.index 1 : index 530// CHECK: %[[IDX2:.+]] = linalg.index 2 : index 531// CHECK: %[[T3:.+]] = addi %[[IDX0]], %[[IDX1]] 532// CHECK: %[[T4:.+]] = addi %[[T3]], %[[IDX2]] 533// CHECK: %[[T5:.+]] = index_cast %[[T4]] : index to i32 534// CHECK: %[[T6:.+]] = addi %[[T5]], %[[ARG4]] : i32 535// CHECK: linalg.yield %[[T6]] : i32 536 537// ----- 538 539#map0 = affine_map<(i, j) -> (i, j)> 540#access = [#map0, #map0] 541#trait = { 542 iterator_types = ["parallel", "parallel"], 543 indexing_maps = #access, 544 library_call = "some_external_func" 545} 546 547func @drop_all_loops(%arg0 : memref<1x1xf32>) -> memref<1x1xf32> 548{ 549 linalg.generic #trait 550 ins(%arg0 : memref<1x1xf32>) 551 outs(%arg0 : memref<1x1xf32>) { 552 ^bb0(%arg1: f32, %arg2: f32) : 553 linalg.yield %arg1 : f32 554 } 555 return %arg0 : memref<1x1xf32> 556} 557// CHECK: #[[$MAP0:.*]] = affine_map<() -> ()> 558// CHECK-LABEL: func @drop_all_loops 559// CHECK: memref.collapse_shape %{{.*}} [] 560// CHECK: linalg.generic 561// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]]] 562// CHECK-SAME: iterator_types = [] 563 564// ----- 565 566#map0 = affine_map<(i, j) -> (i, j)> 567#access = [#map0, #map0] 568#trait = { 569 iterator_types = ["parallel", "parallel"], 570 indexing_maps = #access, 571 library_call = "some_external_func" 572} 573 574func @drop_all_loops_indexed 575 (%arg0 : memref<1x1xi32>) -> memref<1x1xi32>{ 576 linalg.generic #trait 577 ins(%arg0 : memref<1x1xi32>) 578 outs(%arg0 : memref<1x1xi32>) { 579 ^bb0(%arg3: i32, %arg4: i32) : 580 %idx0 = linalg.index 0 : index 581 %idx1 = linalg.index 1 : index 582 %1 = addi %idx0, %idx1 : index 583 %2 = index_cast %1 : index to i32 584 %3 = addi %2, %arg3 : i32 585 linalg.yield %3 : i32 586 } 587 return %arg0 : memref<1x1xi32> 588} 589 590// CHECK-LABEL: func @drop_all_loops_indexed 591// CHECK: linalg.generic 592// CHECK: ^{{.+}}(%[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32) 593// CHECK: linalg.yield %[[ARG1]] : i32 594 595// ----- 596 597#accesses = [ 598 affine_map<(d0) -> (0, d0)>, 599 affine_map<(d0) -> (d0)> 600] 601 602#trait = { 603 indexing_maps = #accesses, 604 iterator_types = ["parallel"], 605 library_call = "some_external_fn" 606} 607 608func @leading_dim_1_canonicalization(%arg0: memref<1x5xf32>, %shape: memref<5xf32>) -> memref<5xf32> { 609 linalg.generic #trait 610 ins(%arg0 : memref<1x5xf32>) 611 outs(%shape : memref<5xf32>) { 612 ^bb0(%arg2: f32, %arg3: f32): // no predecessors 613 linalg.yield %arg2 : f32 614 } 615 return %shape : memref<5xf32> 616} 617// CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (d0)> 618 619// CHECK-LABEL: func @leading_dim_1_canonicalization 620// CHECK: memref.collapse_shape %{{.*}} {{\[}}[0, 1]] 621// CHECK: linalg.generic 622// CHECK-SAME: indexing_maps = [#[[$MAP1]], #[[$MAP1]]] 623// CHECK-SAME: iterator_types = ["parallel"] 624 625// ----- 626 627#accesses = [ 628 affine_map<(d0, d1) -> (0, d1)>, 629 affine_map<(d0, d1) -> (d0, 0)>, 630 affine_map<(d0, d1) -> (d0, d1)> 631] 632 633#trait = { 634 indexing_maps = #accesses, 635 iterator_types = ["parallel", "parallel"], 636 library_call = "some_external_fn" 637} 638 639func @broadcast_test(%arg0 : memref<5xf32>, %arg1 : memref<5xf32>, %shape : memref<5x5xf32>) -> memref<5x5xf32> 640{ 641 %0 = memref.expand_shape %arg0 [[0, 1]] : memref<5xf32> into memref<1x5xf32> 642 %1 = memref.expand_shape %arg1 [[0, 1]] : memref<5xf32> into memref<5x1xf32> 643 linalg.generic #trait 644 ins(%0, %1 : memref<1x5xf32>, memref<5x1xf32>) 645 outs(%shape : memref<5x5xf32>) { 646 ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): 647 %3 = addf %arg3, %arg4 : f32 648 linalg.yield %3 : f32 649 } 650 return %shape : memref<5x5xf32> 651} 652// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)> 653// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)> 654// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> 655// CHECK-LABEL: func @broadcast_test 656// CHECK-NOT: linalg.memref_{{.*}}shape 657// CHECK: linalg.generic 658// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] 659// CHECK-SAME: iterator_types = ["parallel", "parallel"] 660// CHECK-NOT: linalg.memref_{{.*}}shape 661 662// ----- 663 664#accesses = [ 665 affine_map<(d0, d1) -> (0, 0)>, 666 affine_map<(d0, d1) -> (d0, d1)> 667] 668 669#trait = { 670 indexing_maps = #accesses, 671 iterator_types = ["parallel", "parallel"], 672 library_call = "some_external_fn" 673} 674 675func @broadcast_scalar(%arg0 : memref<1x1xf32>, %shape : memref<?x?xf32>) -> memref<?x?xf32> 676{ 677 linalg.generic #trait 678 ins(%arg0 : memref<1x1xf32>) 679 outs(%shape : memref<?x?xf32>) { 680 ^bb0(%arg2 : f32, %arg3 : f32): 681 linalg.yield %arg2 : f32 682 } 683 return %shape : memref<?x?xf32> 684} 685// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> ()> 686// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> 687// CHECK-LABEL: func @broadcast_scalar 688// CHECK-SAME: %[[ARG0:.*]]: memref<1x1xf32> 689// CHECK: %[[A:.*]] = memref.collapse_shape %[[ARG0]] [] 690// CHECK-SAME: memref<1x1xf32> into memref<f32> 691// CHECK: linalg.generic 692// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] 693// CHECK-SAME: iterator_types = ["parallel", "parallel"] 694// CHECK-SAME: %[[A]] 695 696// ----- 697 698#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 699#map1 = affine_map<(d0, d1, d2) -> (d2)> 700func @fold_unit_dim_memref_reshape_op(%arg0 : memref<5xf32>) -> memref<2x5xf32> 701{ 702 %1 = memref.alloc() : memref<1x2x5xf32> 703 linalg.generic {i64, indexing_maps = [#map1, #map0], 704 iterator_types = ["parallel", "parallel", "parallel"]} 705 ins(%arg0 : memref<5xf32>) outs(%1 : memref<1x2x5xf32>) { 706 ^bb0(%arg1: f32, %arg2: f32): // no predecessors 707 linalg.yield %arg1 : f32 708 } 709 %3 = memref.collapse_shape %1 [[0, 1], [2]] 710 : memref<1x2x5xf32> into memref<2x5xf32> 711 return %3 : memref<2x5xf32> 712} 713// CHECK-LABEL: func @fold_unit_dim_memref_reshape_op 714// CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<1x2x5xf32> 715// CHECK: %[[OUT:.*]] = memref.collapse_shape %[[ALLOC]] 716// CHECK: linalg.generic 717// CHECK-SAME: outs(%[[OUT:.*]] : 718// CHECK: %[[RESULT:.*]] = memref.collapse_shape %[[ALLOC]] 719// CHECK: return %[[RESULT]] 720 721// ----- 722 723func @fold_unit_dim_for_init_memref(%input: memref<1x1000xf32>) -> memref<1xf32> { 724 %cst = constant 0.0 : f32 725 %init = memref.alloc() : memref<1xf32> 726 linalg.generic { 727 indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], 728 iterator_types = ["parallel", "reduction"]} 729 ins(%input : memref<1x1000xf32>)outs(%init : memref<1xf32>) { 730 ^bb0(%arg1: f32, %arg2: f32): 731 %1823 = addf %arg1, %arg2 : f32 732 linalg.yield %1823 : f32 733 } 734 return %init : memref<1xf32> 735} 736 737 738// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0)> 739// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ()> 740 741// CHECK: func @fold_unit_dim_for_init_memref 742// CHECK: %[[INIT:.+]] = memref.alloc() : memref<1xf32> 743// CHECK: %[[INPUT_RESHAPE:.+]] = memref.collapse_shape %{{.+}} {{\[}}[0, 1]] : memref<1x1000xf32> into memref<1000xf32> 744// CHECK: %[[INIT_RESHAPE:.+]] = memref.collapse_shape %[[INIT]] [] : memref<1xf32> into memref<f32> 745// CHECK: linalg.generic 746// CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] 747// CHECK-SAME: iterator_types = ["reduction"] 748// CHECK-SAME: ins(%[[INPUT_RESHAPE]] : memref<1000xf32>) 749// CHECK-SAME: outs(%[[INIT_RESHAPE]] : memref<f32>) 750// CHECK: return %[[INIT:.+]] : memref<1xf32> 751 752 753 754