1// RUN: mlir-opt %s -test-linalg-greedy-fusion -split-input-file | FileCheck %s 2 3func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> { 4 %t0 = linalg.matmul ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>) 5 outs(%arg2: tensor<?x?xf32>) 6 -> tensor<?x?xf32> 7 8 %c4 = constant 4 : index 9 %c2 = constant 2 : index 10 %c0 = constant 0 : index 11 %c3 = constant 3 : index 12 %c1 = constant 1 : index 13 %0 = tensor.dim %t0, %c0 : tensor<?x?xf32> 14 %1 = tensor.dim %t0, %c1 : tensor<?x?xf32> 15 %2 = tensor.dim %arg1, %c1 : tensor<?x?xf32> 16 %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) { 17 %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) { 18 %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) { 19 %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32> 20 %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32> 21 %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 22 %9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) outs(%8 : tensor<?x?xf32>) -> tensor<?x?xf32> 23 %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 24 scf.yield %10 : tensor<?x?xf32> 25 } 26 scf.yield %5 : tensor<?x?xf32> 27 } 28 scf.yield %4 : tensor<?x?xf32> 29 } 30 return %3 : tensor<?x?xf32> 31} 32 33// CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> 34// CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> 35 36// CHECK: func @matmul_tensors( 37// CHECK-SAME: %[[A:[0-9a-z]*]]: tensor<?x?xf32> 38// CHECK-SAME: %[[B:[0-9a-z]*]]: tensor<?x?xf32> 39// CHECK-SAME: %[[C:[0-9a-z]*]]: tensor<?x?xf32> 40 41// CHECK-DAG: %[[C0:.*]] = constant 0 : index 42// CHECK-DAG: %[[C1:.*]] = constant 1 : index 43// CHECK-DAG: %[[dA0:.*]] = tensor.dim %[[A]], %[[C0]] : tensor<?x?xf32> 44// CHECK-DAG: %[[dA1:.*]] = tensor.dim %[[A]], %[[C1]] : tensor<?x?xf32> 45// CHECK-DAG: %[[dB0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?x?xf32> 46// CHECK-DAG: %[[dB1:.*]] = tensor.dim %[[B]], %[[C1]] : tensor<?x?xf32> 47// CHECK-DAG: %[[dC0:.*]] = tensor.dim %[[C]], %[[C0]] : tensor<?x?xf32> 48// CHECK-DAG: %[[dC1:.*]] = tensor.dim %[[C]], %[[C1]] : tensor<?x?xf32> 49// CHECK: scf.for %[[I:[0-9a-z]*]] 50// CHECK: %[[sizeA0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dA0]]] 51// CHECK: %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 52// CHECK: %[[sizeC0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dC0]]] 53// CHECK-NEXT: scf.for %[[J:[0-9a-z]*]] 54// CHECK-NEXT: scf.for %[[K:[0-9a-z]*]] {{.*}} iter_args(%[[RES:[0-9a-z]*]] 55// CHECK-DAG: %[[stB1:.*]] = tensor.extract_slice %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1] : tensor<?x?xf32> to tensor<4x3xf32> 56// CHECK-DAG: %[[stF:.*]] = tensor.extract_slice %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1] : tensor<?x?xf32> to tensor<2x3xf32> 57// 58// slices of the producing matmul. 59// CHECK: %[[sizeB1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dB1]]] 60// CHECK: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 61// CHECK: %[[sizeC1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dC1]]] 62// CHECK: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 63// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[stC]] : tensor<?x?xf32>) -> tensor<?x?xf32> 64// CHECK: %[[CAST:.*]] = tensor.cast %[[stD]] : tensor<?x?xf32> to tensor<2x4xf32> 65// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[CAST]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> 66// CHECK-NEXT: tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]] 67 68// ----- 69 70func @conv_tensors_static(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3x32xf32>, %elementwise: tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> { 71 %c112 = constant 112 : index 72 %c32 = constant 32 : index 73 %c16 = constant 16 : index 74 %c8 = constant 8 : index 75 %c4 = constant 4 : index 76 %c0 = constant 0 : index 77 %cst = constant 0.0 : f32 78 79 %init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> 80 %fill = linalg.fill(%cst, %init) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> 81 82 %conv = linalg.conv_2d_input_nhwc_filter_hwcf 83 {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} 84 ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) 85 outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> 86 87 %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> { 88 %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> { 89 %for2 = scf.for %iv2 = %c0 to %c32 step %c4 iter_args(%arg2 = %arg1) -> tensor<1x112x112x32xf32> { 90 %0 = tensor.extract_slice %conv[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> 91 %1 = tensor.extract_slice %elementwise[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> 92 %2 = tensor.extract_slice %arg2[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> 93 %add = linalg.generic 94 { 95 indexing_maps = [ 96 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, 97 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, 98 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], 99 iterator_types = ["parallel", "parallel", "parallel", "parallel"] 100 } 101 ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) outs(%2 : tensor<1x8x16x4xf32>) { 102 ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): 103 %result = addf %arg3, %arg4 : f32 104 linalg.yield %result : f32 105 } -> tensor<1x8x16x4xf32> 106 107 %insert = tensor.insert_slice %add into %arg2[0, %iv0, %iv1, %iv2] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x8x16x4xf32> into tensor<1x112x112x32xf32> 108 scf.yield %insert : tensor<1x112x112x32xf32> 109 } 110 scf.yield %for2 : tensor<1x112x112x32xf32> 111 } 112 scf.yield %for1 : tensor<1x112x112x32xf32> 113 } 114 return %for0 : tensor<1x112x112x32xf32> 115} 116 117// CHECK: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)> 118// CHECK: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> 119 120// CHECK: func @conv_tensors_static 121// CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>) 122 123// CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32> 124// CHECK-NEXT: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32> 125 126// CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]]) 127// CHECK-NEXT: %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]]) 128// CHECK-NEXT: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG1:.+]] = %[[ARG0]]) 129// CHECK-NEXT: %[[OFFSET_W:.+]] = affine.apply #[[MAP0]](%[[IV1]]) 130// CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %arg0[0, %[[OFFSET_H]], %[[OFFSET_W]], 0] [1, 17, 33, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x17x33x3xf32> 131// CHECK-NEXT: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG2:.+]] = %[[ARG1]]) 132// CHECK-NEXT: %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> 133// CHECK-NEXT: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> 134// CHECK-NEXT: %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32> 135// CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> 136// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf 137// CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>) 138// CHECK-SAME: outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>) 139// CHECK-NEXT: %[[ADD:.+]] = linalg.generic 140// CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) 141// CHECK-SAME: outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>) 142// CHECK: tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] 143 144// ----- 145 146func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?xf32>, %elementwise: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> { 147 %cst = constant 0.0 : f32 148 %c0 = constant 0 : index 149 %c1 = constant 1 : index 150 %c2 = constant 2 : index 151 %c3 = constant 3 : index 152 %c4 = constant 4 : index 153 %c8 = constant 8 : index 154 %c16 = constant 16 : index 155 156 %n = tensor.dim %elementwise, %c0 : tensor<?x?x?x?xf32> 157 %oh = tensor.dim %elementwise, %c1 : tensor<?x?x?x?xf32> 158 %ow = tensor.dim %elementwise, %c2 : tensor<?x?x?x?xf32> 159 %oc = tensor.dim %elementwise, %c3 : tensor<?x?x?x?xf32> 160 161 %init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor<?x?x?x?xf32> 162 %fill = linalg.fill(%cst, %init) : f32, tensor<?x?x?x?xf32> -> tensor<?x?x?x?xf32> 163 164 %conv = linalg.conv_2d_input_nhwc_filter_hwcf 165 {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} 166 ins(%input, %filter : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) 167 outs(%fill : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 168 169 %for0 = scf.for %iv0 = %c0 to %n step %c8 iter_args(%arg0 = %fill) -> tensor<?x?x?x?xf32> { 170 %for1 = scf.for %iv1 = %c0 to %oh step %c16 iter_args(%arg1 = %arg0) -> tensor<?x?x?x?xf32> { 171 %for2 = scf.for %iv2 = %c0 to %ow step %c4 iter_args(%arg2 = %arg1) -> tensor<?x?x?x?xf32> { 172 %for3 = scf.for %iv3 = %c0 to %oc step %c2 iter_args(%arg3 = %arg2) -> tensor<?x?x?x?xf32> { 173 %n_size = affine.min affine_map<(d0)[s0] -> (8, -d0 + s0)>(%iv0)[%n] 174 %oh_size = affine.min affine_map<(d0)[s0] -> (16, -d0 + s0)>(%iv1)[%oh] 175 %ow_size = affine.min affine_map<(d0)[s0] -> (4, -d0 + s0)>(%iv2)[%ow] 176 %oc_size = affine.min affine_map<(d0)[s0] -> (2, -d0 + s0)>(%iv2)[%oc] 177 %0 = tensor.extract_slice %conv[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32> 178 %1 = tensor.extract_slice %elementwise[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32> 179 %2 = tensor.extract_slice %arg3[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32> 180 %add = linalg.generic 181 { 182 indexing_maps = [ 183 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, 184 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, 185 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], 186 iterator_types = ["parallel", "parallel", "parallel", "parallel"] 187 } 188 ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) outs(%2 : tensor<?x?x?x?xf32>) { 189 ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): 190 %result = addf %arg4, %arg5 : f32 191 linalg.yield %result : f32 192 } -> tensor<?x?x?x?xf32> 193 194 %insert = tensor.insert_slice %add into %arg3[%iv0, %iv1, %iv2, %iv3] [%n_size, %oh_size, %ow_size, %oc_size] [1, 1, 1, 1] : tensor<?x?x?x?xf32> into tensor<?x?x?x?xf32> 195 scf.yield %insert : tensor<?x?x?x?xf32> 196 } 197 scf.yield %for3 : tensor<?x?x?x?xf32> 198 } 199 scf.yield %for2 : tensor<?x?x?x?xf32> 200 } 201 scf.yield %for1 : tensor<?x?x?x?xf32> 202 } 203 return %for0 : tensor<?x?x?x?xf32> 204} 205 206// CHECK: #[[BOUND8_MAP:.+]] = affine_map<(d0)[s0] -> (8, -d0 + s0)> 207// CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 8, -d0 + s1)> 208// CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> 209// CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)> 210// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s1)> 211// CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)> 212// CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> 213// CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> 214// CHECK: #[[BOUND4_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 4, -d0 + s1)> 215// CHECK: #[[BOUND2_MAP_2:.+]] = affine_map<(d0, d1)[s0, s1] -> (-d0 + s0, 2, -d1 + s1)> 216 217// CHECK: func @conv_tensors_dynamic 218// CHECK-SAME: (%[[INPUT]]: tensor<?x?x?x?xf32>, %[[FILTER]]: tensor<?x?x?x?xf32>, %[[ELEM]]: tensor<?x?x?x?xf32>) 219 220// CHECK-DAG: %[[C0:.+]] = constant 0 : index 221// CHECK-DAG: %[[C1:.+]] = constant 1 : index 222// CHECK-DAG: %[[C2:.+]] = constant 2 : index 223// CHECK-DAG: %[[C3:.+]] = constant 3 : index 224 225// CHECK-DAG: %[[ELEM_N:.+]] = tensor.dim %[[ELEM]], %[[C0]] : tensor<?x?x?x?xf32> 226// CHECK-DAG: %[[ELEM_OH:.+]] = tensor.dim %[[ELEM]], %[[C1]] : tensor<?x?x?x?xf32> 227// CHECK-DAG: %[[ELEM_OW:.+]] = tensor.dim %[[ELEM]], %[[C2]] : tensor<?x?x?x?xf32> 228// CHECK-DAG: %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32> 229 230// CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]] : tensor<?x?x?x?xf32> 231// CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<?x?x?x?xf32> -> tensor<?x?x?x?xf32> 232 233// CHECK-DAG: %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32> 234// CHECK-DAG: %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32> 235// CHECK-DAG: %[[INPUT_N:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?x?xf32> 236// CHECK-DAG: %[[INPUT_H:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x?x?xf32> 237// CHECK-DAG: %[[INPUT_W:.+]] = tensor.dim %[[INPUT]], %[[C2]] : tensor<?x?x?x?xf32> 238// CHECK-DAG: %[[INPUT_C:.+]] = tensor.dim %[[INPUT]], %[[C3]] : tensor<?x?x?x?xf32> 239// CHECK-DAG: %[[FILTER_IC:.+]] = tensor.dim %[[FILTER]], %[[C2]] : tensor<?x?x?x?xf32> 240// CHECK-DAG: %[[FILTER_OC:.+]] = tensor.dim %[[FILTER]], %[[C3]] : tensor<?x?x?x?xf32> 241// CHECK-DAG: %[[FILL_N:.+]] = tensor.dim %[[FILL]], %[[C0]] : tensor<?x?x?x?xf32> 242// CHECK-DAG: %[[FILL_H:.+]] = tensor.dim %[[FILL]], %[[C1]] : tensor<?x?x?x?xf32> 243// CHECK-DAG: %[[FILL_W:.+]] = tensor.dim %[[FILL]], %[[C2]] : tensor<?x?x?x?xf32> 244// CHECK-DAG: %[[FILL_C:.+]] = tensor.dim %[[FILL]], %[[C3]] : tensor<?x?x?x?xf32> 245 246// CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %[[ELEM_N]] step %{{.+}} iter_args(%{{.+}} = %[[FILL]]) 247// CHECK-NEXT: %[[SIZE_ELEM_N:.+]] = affine.min #[[BOUND8_MAP]](%[[IV0]])[%[[ELEM_N]]] 248// CHECK-NEXT: %[[SIZE_INPUT_N:.+]] = affine.min #[[BOUND8_MAP_2]](%[[IV0]])[%[[INPUT_N]], %[[ELEM_N]]] 249// CHECK-NEXT: %[[SIZE_ELEM_N_2:.+]] = affine.min #[[BOUND8_MAP_2]](%[[IV0]])[%[[FILL_N]], %[[ELEM_N]]] 250// CHECK-NEXT: scf.for %[[IV1:.+]] = %{{.+}} to %[[ELEM_OH]] 251// CHECK-NEXT: %[[SIZE_ELEM_OH:.+]] = affine.min #[[BOUND16_MAP]](%[[IV1]])[%[[ELEM_OH]]] 252// CHECK-NEXT: %[[OFFSET_OH:.+]] = affine.apply #[[X2_MAP]](%[[IV1]]) 253// CHECK-NEXT: %[[SIZE_INPUT_H:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OH]], %[[IV1]])[%[[FILTER_H]], %[[INPUT_H]]] 254// CHECK-NEXT: %[[SIZE_ELEM_OH_2:.+]] = affine.min #[[BOUND16_MAP_2]](%[[IV1]])[%[[FILL_H]], %[[ELEM_OH]]] 255// CHECK-NEXT: scf.for %[[IV2:.+]] = %{{.+}} to %[[ELEM_OW]] 256// CHECK-NEXT: %[[SIZE_ELEM_OW:.+]] = affine.min #[[BOUND4_MAP]](%[[IV2]])[%[[ELEM_OW]]] 257// CHECK-NEXT: %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]] 258// CHECK-NEXT: %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]]) 259// CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[INPUT_W]]] 260// CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0] 261// CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]] 262// CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]] 263// CHECK-NEXT: scf.for %[[IV3:.+]] = %{{.+}} to %[[ELEM_OC]] step %{{.+}} iter_args(%[[ARG:[a-z0-9]+]] 264// CHECK-NEXT: %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] 265// CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] 266// CHECK-NEXT: %[[ST_ARG:.+]] = tensor.extract_slice %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] 267// CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] 268// CHECK-NEXT: %[[SIZE_ELEM_OC_2:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILTER_OC]], %[[ELEM_OC]]] 269// CHECK-NEXT: %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV3]]] 270// CHECK-SAME: [%[[FILTER_H]], %[[FILTER_W]], %[[FILTER_IC]], %[[SIZE_ELEM_OC_2]]] 271// CHECK-NEXT: %[[SIZE_ELEM_OC_3:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILL_C]], %[[ELEM_OC]]] 272// CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] 273// CHECK-SAME: [%[[SIZE_ELEM_N_2]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_3]]] 274// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf 275// CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) 276// CHECK-SAME: outs(%[[ST_FILL]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 277// CHECK-NEXT: %[[ST_ADD:.+]] = linalg.generic 278// CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) 279// CHECK-SAME: outs(%[[ST_ARG]] : tensor<?x?x?x?xf32>) 280// CHECK: tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] 281// CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] 282 283// ----- 284 285#map = affine_map<(d0, d1) -> (d0, d1)> 286// CHECK: func @pad_generic_static 287// CHECK-DAG: %[[C0:.*]] = constant 0 : index 288// CHECK-DAG: %[[C16:.*]] = constant 16 : index 289// CHECK-DAG: %[[C32:.*]] = constant 32 : index 290// CHECK-DAG: %[[C64:.*]] = constant 64 : index 291// CHECK-DAG: %[[C128:.*]] = constant 128 : index 292// CHECK: scf.for %{{.*}} = %[[C0]] to %[[C64]] step %[[C16]] 293// CHECK: %[[CMPI1:.*]] = cmpi eq 294// CHECK: scf.for %{{.*}} = %[[C0]] to %[[C128]] step %[[C32]] 295// CHECK: %[[CMPI2:.*]] = cmpi eq 296// CHECK: %[[HASZERO:.*]] = or %[[CMPI2]], %[[CMPI1]] : i1 297// CHECK: scf.if %[[HASZERO]] 298// CHECK: tensor.generate 299// CHECK: else 300// CHECK: tensor.extract_slice 301// CHECK: linalg.pad_tensor 302// CHECK: tensor.cast 303// CHECK: tensor.extract_slice 304// CHECK: tensor.extract_slice 305// CHECK: linalg.generic 306// CHECK: tensor.insert_slice 307func @pad_generic_static(%small_input: tensor<58x1xf32>, %large_input: tensor<64x128xf32>) -> tensor<64x128xf32> { 308 %c0 = constant 0 : index 309 %c1 = constant 1 : index 310 %c16 = constant 16 : index 311 %c32 = constant 32 : index 312 %zero = constant 0.0 : f32 313 314 %d0 = tensor.dim %large_input, %c0 : tensor<64x128xf32> 315 %d1 = tensor.dim %large_input, %c1 : tensor<64x128xf32> 316 317 %pad = linalg.pad_tensor %small_input low[4, 60] high[2, 67] { 318 ^bb0(%arg0: index, %arg1: index): 319 linalg.yield %zero : f32 320 } : tensor<58x1xf32> to tensor<64x128xf32> 321 322 %fill = linalg.fill(%zero, %large_input) : f32, tensor<64x128xf32> -> tensor<64x128xf32> 323 324 %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> { 325 %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> { 326 %0 = tensor.extract_slice %pad[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> 327 %1 = tensor.extract_slice %large_input[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> 328 %2 = tensor.extract_slice %arg1[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> 329 330 %add = linalg.generic 331 {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} 332 ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) outs(%2 : tensor<16x32xf32>) { 333 ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): 334 %result = addf %arg4, %arg5 : f32 335 linalg.yield %result : f32 336 } -> tensor<16x32xf32> 337 338 %insert = tensor.insert_slice %add into %arg1[%iv0, %iv1] [16, 32] [1, 1] : tensor<16x32xf32> into tensor<64x128xf32> 339 scf.yield %insert : tensor<64x128xf32> 340 } 341 scf.yield %for1 : tensor<64x128xf32> 342 } 343 return %for0 : tensor<64x128xf32> 344} 345