1// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s 2// RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s 3 4// Test that we can lower all the way to LLVM without crashing, don't check results here. 5// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -o=/dev/null 2>&1 6 7// CHECK-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> 8// CHECK-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 9// CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> 10// CHECK-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> 11// CHECK-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> 12 13// CHECK-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> 14// CHECK-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> 15// CHECK-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> 16// CHECK-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> 17// CHECK-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)> 18// CHECK-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)> 19 20// CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> 21// CHECKPARALLEL-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 22// CHECKPARALLEL-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> 23// CHECKPARALLEL-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> 24// CHECKPARALLEL-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> 25 26// CHECKPARALLEL-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> 27// CHECKPARALLEL-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> 28// CHECKPARALLEL-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> 29// CHECKPARALLEL-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> 30// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)> 31// CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)> 32 33func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) { 34 %c0 = constant 0 : index 35 %c1 = constant 1 : index 36 %A = memref.view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32> 37 %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32> 38 %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32> 39 linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>) 40 outs(%C: memref<?x?xf32>) 41 return 42} 43// CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>, 44// CHECK-SAME: [[M:arg[0-9]+]]: index 45// CHECK-SAME: [[N:arg[0-9]+]]: index 46// CHECK-SAME: [[K:arg[0-9]+]]: index 47// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 48// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 49// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 50// CHECK: scf.for {{.*}} to %[[M]] 51// CHECK: scf.for {{.*}} to %[[N]] 52// CHECK: scf.for {{.*}} to %[[K]] 53// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 54// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 55// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 56// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 57// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 58// CHECK: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 59 60// CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref<?xi8>, 61// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index 62// CHECKPARALLEL-SAME: [[N:arg[0-9]+]]: index 63// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index 64// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 65// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 66// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 67// CHECKPARALLEL: scf.parallel {{.*}} to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} { 68// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 69// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 70// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 71// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 72// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 73// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 74// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 75 76 77 78func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) { 79 %c0 = constant 0 : index 80 %c1 = constant 1 : index 81 %2 = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32> 82 %3 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32> 83 %4 = memref.view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32> 84 linalg.matvec ins(%2, %3: memref<?x?xf32>, memref<?xf32>) 85 outs(%4 : memref<?xf32>) 86 return 87} 88// CHECK-LABEL: func @matvec(%{{.*}}: memref<?xi8>, 89// CHECK-SAME: [[M:arg[0-9]+]]: index 90// CHECK-SAME: [[K:arg[0-9]+]]: index 91// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 92// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 93// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 94// CHECK: scf.for {{.*}} to %[[M]] 95// CHECK: scf.for {{.*}} to %[[K]] 96// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 97// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 98// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 99// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32> 100// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 101// CHECK: store %[[res]], %[[C]][%{{.*}}] : memref<?xf32> 102 103// CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref<?xi8>, 104// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index 105// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index 106// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 107// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 108// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 109// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) { 110// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 111// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 112// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 113// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 114// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32> 115// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 116// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}] : memref<?xf32> 117 118 119func @dot(%arg0: memref<?xi8>, %M: index) { 120 %c0 = constant 0 : index 121 %c1 = constant 1 : index 122 %1 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32> 123 %2 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32> 124 %3 = memref.view %arg0[%c0][] : memref<?xi8> to memref<f32> 125 linalg.dot ins(%1, %2 : memref<?xf32>, memref<?xf32>) 126 outs(%3 : memref<f32>) 127 return 128} 129// CHECK-LABEL: func @dot(%{{.*}}: memref<?xi8>, 130// CHECK-SAME: [[K:arg[0-9]+]]: index 131// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 132// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 133// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32> 134// CHECK: scf.for {{.*}} to %[[K]] 135// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32> 136// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 137// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 138// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][] : memref<f32> 139// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 140// CHECK: store %[[res]], %[[C]][] : memref<f32> 141 142// CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref<?xi8>, 143// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index 144// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 145// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 146// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32> 147// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 148// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32> 149// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 150// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 151// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %[[C]][] : memref<f32> 152// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 153// CHECKPARALLEL: store %[[res]], %[[C]][] : memref<f32> 154 155 156func @dot_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>, %arg2: memref<f32>) { 157 linalg.dot ins(%arg0, %arg1 : memref<?xf32, offset: ?, strides: [1]>, 158 memref<?xf32, offset: ?, strides: [1]>) 159 outs(%arg2: memref<f32>) 160 return 161} 162// CHECK-LABEL: func @dot_view( 163// CHECK: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<f32>) { 164// CHECK: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]> 165// CHECK: scf.for {{.*}} to %[[K]] 166// CHECK-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 167// CHECK-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 168// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 169// CHECK-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref<f32> 170// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 171// CHECK: store %[[res]], %{{.*}}[] : memref<f32> 172 173// CHECKPARALLEL-LABEL: func @dot_view( 174// CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<f32>) { 175// CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]> 176// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 177// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 178// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 179// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 180// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref<f32> 181// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 182// CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref<f32> 183 184func @fill_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: f32) { 185 linalg.fill(%arg1, %arg0) : f32, memref<?xf32, offset: ?, strides: [1]> 186 return 187} 188// CHECK-LABEL: func @fill_view( 189// CHECK: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: f32) { 190// CHECK: scf.for {{.*}} to %{{.*}} 191// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 192 193// CHECKPARALLEL-LABEL: func @fill_view( 194// CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: f32) { 195// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 196// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 197 198func @fill_view0(%arg0: memref<f32>, %arg1: f32) { 199 linalg.fill(%arg1, %arg0) : f32, memref<f32> 200 return 201} 202// CHECK-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) { 203// CHECK: store %{{.*}}, %{{.*}}[] : memref<f32> 204 205// CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) { 206// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref<f32> 207 208func @fill_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: f32) { 209 linalg.fill(%arg1, %arg0) : f32, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> 210 return 211} 212// CHECK-LABEL: func @fill_view3( 213// CHECK: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: f32) { 214// CHECK: scf.for {{.*}} to %{{.*}} 215// CHECK: scf.for {{.*}} to %{{.*}} 216// CHECK: scf.for {{.*}} to %{{.*}} 217// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 218 219// CHECKPARALLEL-LABEL: func @fill_view3( 220// CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: f32) { 221// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { 222// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 223 224func @copy_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>) { 225 linalg.copy(%arg0, %arg1) : memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]> 226 return 227} 228// CHECK-LABEL: func @copy_view( 229// CHECK: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>) { 230// CHECK: scf.for {{.*}} to %{{.*}} 231// CHECK: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 232// CHECK: store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 233 234// CHECKPARALLEL-LABEL: func @copy_view( 235// CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>) { 236// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 237// CHECKPARALLEL: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 238// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]> 239 240func @copy_view0(%arg0: memref<f32>, %arg1: memref<f32>) { 241 linalg.copy(%arg0, %arg1) : memref<f32>, memref<f32> 242 return 243} 244// CHECK-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) { 245// CHECK: memref.load %{{.*}}[] : memref<f32> 246// CHECK: store %{{.*}}, %{{.*}}[] : memref<f32> 247 248// CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) { 249// CHECKPARALLEL: memref.load %{{.*}}[] : memref<f32> 250// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref<f32> 251 252func @copy_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) { 253 linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>, 254 outputPermutation = affine_map<(i, j, k) -> (k, j, i)>} : 255 memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> 256 return 257} 258// CHECK-LABEL: func @copy_view3 259// CHECK: (%{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) { 260// CHECK: scf.for {{.*}} to %{{.*}} 261// CHECK: scf.for {{.*}} to %{{.*}} 262// CHECK: scf.for {{.*}} to %{{.*}} 263// CHECK: %[[L:.*]] = memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 264// CHECK: store %[[L]], {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 265 266// CHECKPARALLEL-LABEL: func @copy_view3 267// CHECKPARALLEL: (%{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) { 268// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { 269// CHECKPARALLEL: %[[L:.*]] = memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 270// CHECKPARALLEL: store %[[L]], {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 271 272func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) { 273 linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> 274 return 275} 276// CHECK-LABEL: func @conv_view3( 277// CHECK: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) { 278// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]> 279// CHECK: %[[Q:.*]] = memref.dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]> 280// CHECK: %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]> 281// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]> 282// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]> 283// CHECK: scf.for {{.*}} to %[[B]] 284// CHECK: scf.for {{.*}} to %[[X0]] 285// CHECK: scf.for {{.*}} to %[[K]] 286// CHECK: scf.for {{.*}} to %[[Q]] 287// CHECK: scf.for {{.*}} to %[[Z0]] 288// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]] 289// CHECK: memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]> 290// CHECK: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 291// CHECK: mulf 292// CHECK: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 293// CHECK: addf 294// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 295 296// CHECKPARALLEL-LABEL: func @conv_view3( 297// CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) { 298// CHECKPARALLEL: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]> 299// CHECKPARALLEL: %[[Q:.*]] = memref.dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]> 300// CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]> 301// CHECKPARALLEL: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]> 302// CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]> 303// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { 304// CHECKPARALLEL: scf.for {{.*}} to %[[Q]] 305// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]] 306// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]] 307// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]> 308// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 309// CHECKPARALLEL: mulf 310// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 311// CHECKPARALLEL: addf 312// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?xf32, #[[$strided3D]]> 313 314func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg2: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>) { 315 linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]> 316 return 317} 318// CHECK-LABEL: func @conv_view4( 319// CHECK: %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>) { 320// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]> 321// CHECK: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]> 322// CHECK: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]> 323// CHECK: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]> 324// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]> 325// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]> 326// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]> 327// CHECK: scf.for {{.*}} to %[[B]] 328// CHECK: scf.for {{.*}} to %[[X0]] 329// CHECK: scf.for {{.*}} to %[[X1]] 330// CHECK: scf.for {{.*}} to %[[K]] 331// CHECK: scf.for {{.*}} to %[[Q]] 332// CHECK: scf.for {{.*}} to %[[Z0]] 333// CHECK: scf.for {{.*}} to %[[Z1]] 334// CHECK: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]] 335// CHECK: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]] 336// CHECK: memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]> 337// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]> 338// CHECK: mulf 339// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]> 340// CHECK: addf 341// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]> 342 343// CHECKPARALLEL-LABEL: func @conv_view4( 344// CHECKPARALLEL: %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>) { 345// CHECKPARALLEL: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]> 346// CHECKPARALLEL: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]> 347// CHECKPARALLEL: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]> 348// CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]> 349// CHECKPARALLEL: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]> 350// CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]> 351// CHECKPARALLEL: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]> 352// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { 353// CHECKPARALLEL: scf.for {{.*}} to %[[Q]] 354// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]] 355// CHECKPARALLEL: scf.for {{.*}} to %[[Z1]] 356// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]] 357// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]] 358// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]> 359// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]> 360// CHECKPARALLEL: mulf 361// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]> 362// CHECKPARALLEL: addf 363// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32, #[[$strided4D]]> 364 365func @conv_padding(%arg0: memref<?x?x?x?xf32>, 366 %arg1: memref<?x?x?x?xf32>, 367 %arg2: memref<?x?x?x?xf32>) { 368 linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1], 369 padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>, 370 strides = [1, 1]} : 371 memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32> 372 return 373} 374// CHECK-LABEL: func @conv_padding 375// CHECK: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) { 376// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32 377// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32> 378// CHECK: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32> 379// CHECK: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32> 380// CHECK: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32> 381// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32> 382// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32> 383// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32> 384// CHECK: scf.for {{.*}} to %[[B]] 385// CHECK: scf.for {{.*}} to %[[X0]] 386// CHECK: scf.for {{.*}} to %[[X1]] 387// CHECK: scf.for {{.*}} to %[[K]] 388// CHECK: scf.for {{.*}} to %[[Q]] 389// CHECK: scf.for {{.*}} to %[[Z0]] 390// CHECK: scf.for {{.*}} to %[[Z1]] 391// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}} 392// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}} 393// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) 394// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) 395// CHECK: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32> 396// CHECK: select %{{.*}}, 397// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32> 398// CHECK: mulf 399// CHECK: memref.load {{.*}} : memref<?x?x?x?xf32> 400// CHECK: addf 401// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32> 402 403// CHECKPARALLEL-LABEL: func @conv_padding 404// CHECKPARALLEL: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) { 405// CHECKPARALLEL: %[[ZERO:.*]] = constant 0.000000e+00 : f32 406// CHECKPARALLEL: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32> 407// CHECKPARALLEL: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32> 408// CHECKPARALLEL: %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32> 409// CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32> 410// CHECKPARALLEL: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32> 411// CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32> 412// CHECKPARALLEL: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32> 413// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { 414// CHECKPARALLEL: scf.for {{.*}} to %[[Q]] 415// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]] 416// CHECKPARALLEL: scf.for {{.*}} to %[[Z1]] 417// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}} 418// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}} 419// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) 420// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) 421// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32> 422// CHECKPARALLEL: select %{{.*}}, 423// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32> 424// CHECKPARALLEL: mulf 425// CHECKPARALLEL: memref.load {{.*}} : memref<?x?x?x?xf32> 426// CHECKPARALLEL: addf 427// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?x?xf32> 428 429func @pooling_max(%arg0: memref<?x?xf32>, 430 %arg1: memref<?x?xi32>, 431 %arg2: memref<?x?xf32>) { 432 linalg.pooling_max(%arg0, %arg1, %arg2) { strides = [2, 1] }: 433 memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32> 434 return 435} 436// CHECK-LABEL: func @pooling_max 437// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 438// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 439// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 440// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 441// CHECK: scf.for {{.*}} to %[[OX]] 442// CHECK: scf.for {{.*}} to %[[OY]] 443// CHECK: scf.for {{.*}} to %[[WX]] 444// CHECK: scf.for {{.*}} to %[[WY]] 445// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] 446// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] 447// CHECK: memref.load {{.*}} : memref<?x?xf32> 448// CHECK: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32> 449// CHECK: %[[RES:.*]] = select %{{.*}}, 450// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32> 451 452// CHECKPARALLEL-LABEL: func @pooling_max 453// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 454// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 455// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 456// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 457// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 458// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 459// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 460// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] 461// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] 462// CHECKPARALLEL: memref.load {{.*}} : memref<?x?xf32> 463// CHECKPARALLEL: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32> 464// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, 465// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32> 466 467func @pooling_max_padding(%arg0: memref<?x?xf32>, 468 %arg1: memref<?x?xi32>, 469 %arg2: memref<?x?xf32>) { 470 linalg.pooling_max(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : 471 memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32> 472 return 473} 474// CHECK-LABEL: func @pooling_max_padding 475// CHECK: %[[PAD:.*]] = constant 0xFF800000 : f32 476// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 477// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 478// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 479// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 480// CHECK: scf.for {{.*}} to %[[OX]] 481// CHECK: scf.for {{.*}} to %[[OY]] 482// CHECK: scf.for {{.*}} to %[[WX]] 483// CHECK: scf.for {{.*}} to %[[WY]] 484// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 485// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 486// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 487// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 488// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 489// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32> 490// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 491// CHECK: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32 492// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 493// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32> 494 495// CHECKPARALLEL-LABEL: func @pooling_max_padding 496// CHECKPARALLEL: %[[PAD:.*]] = constant 0xFF800000 : f32 497// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 498// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 499// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 500// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 501// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 502// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 503// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 504// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 505// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 506// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 507// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 508// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 509// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32> 510// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 511// CHECKPARALLEL: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32 512// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 513// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32> 514 515func @pooling_max_padding_i32(%arg0: memref<?x?xi32>, 516 %arg1: memref<?x?xi32>, 517 %arg2: memref<?x?xi32>) { 518 linalg.pooling_max(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : 519 memref<?x?xi32>, memref<?x?xi32>, memref<?x?xi32> 520 return 521} 522// CHECK-LABEL: func @pooling_max_padding_i32 523// CHECK: %[[PAD:.*]] = constant -2147483648 : i32 524// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 525// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 526// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32> 527// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32> 528// CHECK: scf.for {{.*}} to %[[OX]] 529// CHECK: scf.for {{.*}} to %[[OY]] 530// CHECK: scf.for {{.*}} to %[[WX]] 531// CHECK: scf.for {{.*}} to %[[WY]] 532// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 533// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 534// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32> 535// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 536// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 537// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32> 538// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 539// CHECK: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32 540// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 541// CHECK: store %[[RES]], {{.*}} : memref<?x?xi32> 542 543// CHECKPARALLEL-LABEL: func @pooling_max_padding_i32 544// CHECKPARALLEL: %[[PAD:.*]] = constant -2147483648 : i32 545// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 546// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 547// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32> 548// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32> 549// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 550// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 551// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 552// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 553// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 554// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32> 555// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 556// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 557// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32> 558// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 559// CHECKPARALLEL: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32 560// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 561// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xi32> 562 563func @pooling_min(%arg0: memref<?x?xf32>, 564 %arg1: memref<?x?xi32>, 565 %arg2: memref<?x?xf32>) { 566 linalg.pooling_min(%arg0, %arg1, %arg2) { strides = [2, 1] }: 567 memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32> 568 return 569} 570// CHECK-LABEL: func @pooling_min 571// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 572// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 573// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 574// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 575// CHECK: scf.for {{.*}} to %[[OX]] 576// CHECK: scf.for {{.*}} to %[[OY]] 577// CHECK: scf.for {{.*}} to %[[WX]] 578// CHECK: scf.for {{.*}} to %[[WY]] 579// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] 580// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] 581// CHECK: memref.load {{.*}} : memref<?x?xf32> 582// CHECK: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32> 583// CHECK: %[[RES:.*]] = select %{{.*}}, 584// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32> 585 586// CHECKPARALLEL-LABEL: func @pooling_min 587// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 588// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 589// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 590// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 591// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 592// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 593// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 594// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] 595// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] 596// CHECKPARALLEL: memref.load {{.*}} : memref<?x?xf32> 597// CHECKPARALLEL: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32> 598// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, 599// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32> 600 601func @pooling_min_padding(%arg0: memref<?x?xf32>, 602 %arg1: memref<?x?xi32>, 603 %arg2: memref<?x?xf32>) { 604 linalg.pooling_min(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : 605 memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32> 606 return 607} 608// CHECK-LABEL: func @pooling_min_padding 609// CHECK: %[[PAD:.*]] = constant 0x7F800000 : f32 610// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 611// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 612// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 613// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 614// CHECK: scf.for {{.*}} to %[[OX]] 615// CHECK: scf.for {{.*}} to %[[OY]] 616// CHECK: scf.for {{.*}} to %[[WX]] 617// CHECK: scf.for {{.*}} to %[[WY]] 618// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 619// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 620// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 621// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 622// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 623// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32> 624// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 625// CHECK: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32 626// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 627// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32> 628 629// CHECKPARALLEL-LABEL: func @pooling_min_padding 630// CHECKPARALLEL: %[[PAD:.*]] = constant 0x7F800000 : f32 631// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 632// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 633// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 634// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 635// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 636// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 637// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 638// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 639// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 640// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 641// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 642// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 643// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32> 644// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 645// CHECKPARALLEL: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32 646// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 647// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32> 648 649func @pooling_min_padding_i32(%arg0: memref<?x?xi32>, 650 %arg1: memref<?x?xi32>, 651 %arg2: memref<?x?xi32>) { 652 linalg.pooling_min(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : 653 memref<?x?xi32>, memref<?x?xi32>, memref<?x?xi32> 654 return 655} 656// CHECK-LABEL: func @pooling_min_padding_i32 657// CHECK: %[[PAD:.*]] = constant 2147483647 : i32 658// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 659// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 660// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32> 661// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32> 662// CHECK: scf.for {{.*}} to %[[OX]] 663// CHECK: scf.for {{.*}} to %[[OY]] 664// CHECK: scf.for {{.*}} to %[[WX]] 665// CHECK: scf.for {{.*}} to %[[WY]] 666// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 667// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 668// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32> 669// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 670// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 671// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32> 672// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 673// CHECK: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32 674// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 675// CHECK: store %[[RES]], {{.*}} : memref<?x?xi32> 676 677// CHECKPARALLEL-LABEL: func @pooling_min_padding_i32 678// CHECKPARALLEL: %[[PAD:.*]] = constant 2147483647 : i32 679// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 680// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 681// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32> 682// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32> 683// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 684// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 685// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 686// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 687// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 688// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32> 689// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 690// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 691// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32> 692// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 693// CHECKPARALLEL: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32 694// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 695// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xi32> 696 697func @pooling_sum(%arg0: memref<?x?xf32>, 698 %arg1: memref<?x?xi32>, 699 %arg2: memref<?x?xf32>) { 700 linalg.pooling_sum(%arg0, %arg1, %arg2) { strides = [2, 1] }: 701 memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32> 702 return 703} 704// CHECK-LABEL: func @pooling_sum 705// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 706// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 707// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 708// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 709// CHECK: scf.for {{.*}} to %[[OX]] 710// CHECK: scf.for {{.*}} to %[[OY]] 711// CHECK: scf.for {{.*}} to %[[WX]] 712// CHECK: scf.for {{.*}} to %[[WY]] 713// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] 714// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] 715// CHECK: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32> 716// CHECK: %[[LHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 717// CHECK: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 718// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32> 719 720// CHECKPARALLEL-LABEL: func @pooling_sum 721// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 722// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 723// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 724// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 725// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 726// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 727// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 728// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] 729// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] 730// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32> 731// CHECKPARALLEL: %[[LHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 732// CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 733// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32> 734 735func @pooling_sum_padding(%arg0: memref<?x?xf32>, 736 %arg1: memref<?x?xi32>, 737 %arg2: memref<?x?xf32>) { 738 linalg.pooling_sum(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : 739 memref<?x?xf32>, memref<?x?xi32>, memref<?x?xf32> 740 return 741} 742// CHECK-LABEL: func @pooling_sum_padding 743// CHECK: %[[PAD:.*]] = constant 0.000000e+00 : f32 744// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 745// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 746// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 747// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 748// CHECK: scf.for {{.*}} to %[[OX]] 749// CHECK: scf.for {{.*}} to %[[OY]] 750// CHECK: scf.for {{.*}} to %[[WX]] 751// CHECK: scf.for {{.*}} to %[[WY]] 752// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 753// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 754// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 755// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 756// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32> 757// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 758// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 759// CHECK: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32 760// CHECK: store %[[RES]], {{.*}} : memref<?x?xf32> 761 762// CHECKPARALLEL-LABEL: func @pooling_sum_padding 763// CHECKPARALLEL: %[[PAD:.*]] = constant 0.000000e+00 : f32 764// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 765// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 766// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32> 767// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32> 768// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 769// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 770// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 771// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 772// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 773// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 774// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 775// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32> 776// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 777// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xf32> 778// CHECKPARALLEL: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32 779// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xf32> 780 781func @pooling_sum_padding_i32(%arg0: memref<?x?xi32>, 782 %arg1: memref<?x?xi32>, 783 %arg2: memref<?x?xi32>) { 784 linalg.pooling_sum(%arg0, %arg1, %arg2) { padding = dense<[[2, 2], [1, 1]]> : tensor<2x2xi64> } : 785 memref<?x?xi32>, memref<?x?xi32>, memref<?x?xi32> 786 return 787} 788// CHECK-LABEL: func @pooling_sum_padding_i32 789// CHECK: %[[PAD:.*]] = constant 0 : i32 790// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 791// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 792// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32> 793// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32> 794// CHECK: scf.for {{.*}} to %[[OX]] 795// CHECK: scf.for {{.*}} to %[[OY]] 796// CHECK: scf.for {{.*}} to %[[WX]] 797// CHECK: scf.for {{.*}} to %[[WY]] 798// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 799// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 800// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 801// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 802// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32> 803// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 804// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32> 805// CHECK: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32 806// CHECK: store %[[RES]], {{.*}} : memref<?x?xi32> 807 808// CHECKPARALLEL-LABEL: func @pooling_sum_padding_i32 809// CHECKPARALLEL: %[[PAD:.*]] = constant 0 : i32 810// CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32> 811// CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32> 812// CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32> 813// CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32> 814// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) 815// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] 816// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] 817// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] 818// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] 819// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) 820// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) 821// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32> 822// CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 823// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref<?x?xi32> 824// CHECKPARALLEL: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32 825// CHECKPARALLEL: store %[[RES]], {{.*}} : memref<?x?xi32> 826 827#accesses = [ 828 affine_map<(i, j, k) -> (i, j)>, 829 affine_map<(i, j, k) -> (i, j, k)>, 830 affine_map<(i, j, k) -> (i, k, j)> 831] 832#trait2 = { 833 args_in = 1, 834 args_out = 2, 835 iterator_types = ["parallel", "parallel", "parallel"], 836 indexing_maps = #accesses, 837 library_call = "some_external_function_name_2", 838 doc = "B(i,j,k), C(i,k,j) = foo(A(i, j), B(i,j,k), C(i,k,j))" 839} 840func @generic_region(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) { 841 linalg.generic #trait2 842 ins(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>) 843 outs(%arg1, %arg2 : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, 844 memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) { 845 ^bb0(%a: f32, %b: f32, %c: f32): 846 %d = mulf %a, %b : f32 847 %e = addf %c, %d : f32 848 linalg.yield %d, %e : f32, f32 849 } 850 return 851} 852// CHECK-LABEL: @generic_region 853// CHECK: scf.for %[[i:.*]] = {{.*}} 854// CHECK: scf.for %[[j:.*]] = {{.*}} 855// CHECK: scf.for %[[k:.*]] = {{.*}} 856// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]> 857// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]> 858// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]> 859// CHECK: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 860// CHECK: %[[e:.*]] = addf %[[c]], %[[d]] : f32 861// CHECK: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]> 862// CHECK: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]> 863 864// CHECKPARALLEL-LABEL: @generic_region 865// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) 866// CHECKPARALLEL: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]> 867// CHECKPARALLEL: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]> 868// CHECKPARALLEL: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]> 869// CHECKPARALLEL: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 870// CHECKPARALLEL: %[[e:.*]] = addf %[[c]], %[[d]] : f32 871// CHECKPARALLEL: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]> 872// CHECKPARALLEL: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]> 873 874#trait4 = { 875 args_in = 1, 876 args_out = 2, 877 iterator_types = ["parallel", "parallel", "parallel"], 878 indexing_maps = #accesses, 879 library_call = "some_external_function_name_2", 880 doc = "B(i,j,k), C(i,k,j) = foo(A(i, j) * B(i,j,k), i * j * k + C(i,k,j))" 881} 882func @generic_index_region( 883 %arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, 884 %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, 885 %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) { 886 linalg.generic #trait4 887 ins(%arg0 : memref<?x?xf32, offset: ?, strides: [?, 1]>) 888 outs(%arg1, %arg2 : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, 889 memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) { 890 ^bb0(%a: f32, %b: f32, %c: f32): 891 %i = linalg.index 0 : index 892 %j = linalg.index 1 : index 893 %k = linalg.index 2 : index 894 %result_1 = mulf %a, %b : f32 895 896 %ij = addi %i, %j : index 897 %ijk = addi %ij, %k : index 898 %ijk_int = index_cast %ijk : index to i32 899 %ijk_float = sitofp %ijk_int : i32 to f32 900 901 %result_2 = addf %c, %ijk_float : f32 902 linalg.yield %result_1, %result_2 : f32, f32 903 } 904 return 905} 906 907// CHECK-LABEL: @generic_index_region 908// CHECK: scf.for %[[i:.*]] = {{.*}} 909// CHECK: scf.for %[[j:.*]] = {{.*}} 910// CHECK: scf.for %[[k:.*]] = {{.*}} 911// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] 912// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] 913// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] 914// CHECK: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 915// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index 916// CHECK: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index 917// CHECK: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 918// CHECK: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 919// CHECK: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 920// CHECK: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] 921// CHECK: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] 922 923// CHECKPARALLEL-LABEL: @generic_index_region 924// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) 925// CHECKPARALLEL: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] 926// CHECKPARALLEL: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] 927// CHECKPARALLEL: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] 928// CHECKPARALLEL: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 929// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index 930// CHECKPARALLEL: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index 931// CHECKPARALLEL: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 932// CHECKPARALLEL: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 933// CHECKPARALLEL: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 934// CHECKPARALLEL: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] 935// CHECKPARALLEL: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] 936 937// ----- 938 939#broadcast_access = [ 940 affine_map<(i, j) -> ()>, 941 affine_map<(i, j) -> (i, j)> 942] 943 944#trait_broadcast = { 945 args_in = 1, 946 args_out = 1, 947 indexing_maps = #broadcast_access, 948 iterator_types = ["parallel", "parallel"], 949 library_call = "some_broadcast_external_fn" 950} 951 952func @generic_op_zero_rank(%arg0: memref<f32>, %arg1: memref<3x4xf32>) 953{ 954 linalg.generic #trait_broadcast 955 ins(%arg0 : memref<f32>) 956 outs(%arg1 : memref<3x4xf32>) { 957 ^bb(%a: f32, %b: f32) : 958 linalg.yield %a : f32 959 } 960 return 961} 962 963// CHECK-LABEL: @generic_op_zero_rank 964// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 965// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 966// CHECK: scf.for %[[i:.*]] = {{.*}} 967// CHECK: scf.for %[[j:.*]] = {{.*}} 968// CHECK: %[[a:.*]] = memref.load %[[ARG0]][] 969// CHECK: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] 970 971// CHECKPARALLEL-LABEL: @generic_op_zero_rank 972// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 973// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 974// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) 975// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][] 976// CHECKPARALLEL: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] 977 978func @generic_op_scalar(%arg0: f32, %arg1: memref<3x4xf32>) 979{ 980 linalg.generic #trait_broadcast 981 ins(%arg0 : f32) 982 outs(%arg1 : memref<3x4xf32>) { 983 ^bb(%a: f32, %b: f32) : 984 linalg.yield %a : f32 985 } 986 return 987} 988 989// CHECK-LABEL: @generic_op_scalar 990// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: f32 991// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 992// CHECK: scf.for %[[i:.*]] = {{.*}} 993// CHECK: scf.for %[[j:.*]] = {{.*}} 994// CHECK: store %[[ARG0]], %[[ARG1]][%[[i]], %[[j]]] 995 996// CHECKPARALLEL-LABEL: @generic_op_scalar 997// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: f32 998// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 999// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) 1000// CHECKPARALLEL: store %[[ARG0]], %[[ARG1]][%[[i]], %[[j]]] 1001 1002func @generic_index_op_zero_rank(%arg0: memref<i32>, %arg1: memref<3x4xi32>) 1003{ 1004 linalg.generic #trait_broadcast 1005 ins(%arg0 : memref<i32>) 1006 outs(%arg1 : memref<3x4xi32>) { 1007 ^bb(%a: i32, %b: i32) : 1008 %i = linalg.index 0 : index 1009 %j = linalg.index 1 : index 1010 %ij = addi %i, %j : index 1011 %ij_int = index_cast %ij : index to i32 1012 %result = addi %a, %ij_int : i32 1013 linalg.yield %result : i32 1014 } 1015 return 1016} 1017 1018// CHECK-LABEL: @generic_index_op_zero_rank 1019// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32> 1020// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> 1021// CHECK: scf.for %[[i:.*]] = {{.*}} 1022// CHECK: scf.for %[[j:.*]] = {{.*}} 1023// CHECK: %[[a:.*]] = memref.load %[[ARG0]][ 1024// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index 1025// CHECK: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 1026// CHECK: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 1027// CHECK: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] 1028 1029// CHECKPARALLEL-LABEL: @generic_index_op_zero_rank 1030// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32> 1031// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> 1032// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) 1033// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][ 1034// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index 1035// CHECKPARALLEL: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 1036// CHECKPARALLEL: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 1037// CHECKPARALLEL: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] 1038 1039#reduce_1D_access = [ 1040 affine_map<(i) -> (i)>, 1041 affine_map<(i) -> ()> 1042] 1043 1044#trait_reduce_1D = { 1045 args_in = 1, 1046 args_out = 1, 1047 indexing_maps = #reduce_1D_access, 1048 iterator_types = ["reduction"], 1049 library_call = "some_reduce_external_fn" 1050} 1051 1052func @generic_op_1D_reduce(%arg0: memref<?xf32>, %arg1: memref<f32>) 1053{ 1054 linalg.generic #trait_reduce_1D 1055 ins(%arg0 : memref<?xf32>) 1056 outs(%arg1 : memref<f32>) { 1057 ^bb(%a: f32, %b: f32) : 1058 %0 = addf %a, %b : f32 1059 linalg.yield %0 : f32 1060 } 1061 return 1062} 1063// CHECK-LABEL: @generic_op_1D_reduce 1064// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 1065// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 1066// CHECK: scf.for %[[i:.*]] = {{.*}} 1067// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 1068// CHECK: %[[b:.*]] = memref.load %[[ARG1]][] 1069// CHECK: %[[c:.*]] = addf %[[a]], %[[b]] : f32 1070// CHECK: store %[[c]], %[[ARG1]][] 1071 1072// CHECKPARALLEL-LABEL: @generic_op_1D_reduce 1073// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 1074// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 1075// CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}} 1076// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 1077// CHECKPARALLEL: %[[b:.*]] = memref.load %[[ARG1]][] 1078// CHECKPARALLEL: %[[c:.*]] = addf %[[a]], %[[b]] : f32 1079// CHECKPARALLEL: store %[[c]], %[[ARG1]][] 1080 1081 1082#reduce_init_1D_access = [ 1083 affine_map<(i) -> (i)>, 1084 affine_map<(i) -> ()>, 1085 affine_map<(i) -> ()> 1086] 1087 1088#trait_reduce_init_1D = { 1089 args_in = 2, 1090 args_out = 1, 1091 indexing_maps = #reduce_init_1D_access, 1092 iterator_types = ["reduction"], 1093 library_call = "some_reduce_external_fn" 1094} 1095 1096func @generic_index_op_1D_reduce(%arg0: memref<?xf32>, 1097 %arg1: memref<f32>, 1098 %arg2: memref<f32>) 1099{ 1100 linalg.generic #trait_reduce_init_1D 1101 ins(%arg0, %arg1 : memref<?xf32>, memref<f32>) 1102 outs(%arg2 : memref<f32>) { 1103 ^bb(%a: f32, %b: f32, %c: f32) : 1104 %i = linalg.index 0 : index 1105 %0 = constant 0 : index 1106 %1 = cmpi eq, %0, %i : index 1107 %2 = select %1, %b, %c : f32 1108 %3 = addf %a, %2 : f32 1109 linalg.yield %3 : f32 1110 } 1111 return 1112} 1113// CHECK-LABEL: @generic_index_op_1D_reduce 1114// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 1115// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 1116// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 1117// CHECK: scf.for %[[i:.*]] = {{.*}} 1118// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 1119// CHECK: %[[b:.*]] = memref.load %[[ARG1]][] 1120// CHECK: %[[c:.*]] = memref.load %[[ARG2]][] 1121// CHECK: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] 1122// CHECK: %[[e:.*]] = addf %[[a]], %[[d]] 1123// CHECK: store %[[e]], %[[ARG2]][] 1124 1125// CHECKPARALLEL-LABEL: @generic_index_op_1D_reduce 1126// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 1127// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 1128// CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 1129// CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}} 1130// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 1131// CHECKPARALLEL: %[[b:.*]] = memref.load %[[ARG1]][] 1132// CHECKPARALLEL: %[[c:.*]] = memref.load %[[ARG2]][] 1133// CHECKPARALLEL: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] 1134// CHECKPARALLEL: %[[e:.*]] = addf %[[a]], %[[d]] 1135// CHECKPARALLEL: store %[[e]], %[[ARG2]][] 1136 1137#trait_const_fill = { 1138 args_in = 0, 1139 args_out = 1, 1140 indexing_maps = [affine_map<(i) -> (i)>], 1141 iterator_types = ["parallel"], 1142 library_call = "some_external_fn" 1143} 1144func @generic_const_init(%arg0: memref<?xf32>) { 1145 %cst = constant 1.0 : f32 1146 linalg.generic #trait_const_fill outs(%arg0 : memref<?xf32>) { 1147 ^bb0(%arg1: f32): // no predecessors 1148 linalg.yield %cst : f32 1149 } 1150 return 1151} 1152// CHECK-LABEL: @generic_const_init 1153// CHECK-SAME: %[[ARG0:.*]]: memref<?xf32> 1154// CHECK: %[[CONST:.*]] = constant 1.000000e+00 : f32 1155// CHECK: scf.for %[[i:.*]] = {{.*}} 1156// CHECK: store %[[CONST]], %[[ARG0]] 1157 1158// CHECKPARALLEL-LABEL: @generic_const_init 1159// CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref<?xf32> 1160// CHECKPARALLEL: %[[CONST:.*]] = constant 1.000000e+00 : f32 1161// CHECKPARALLEL: scf.parallel (%[[i:.*]]) 1162// CHECKPARALLEL: store %[[CONST]], %[[ARG0]] 1163 1164#scalar_access = [ 1165 affine_map<() -> ()>, 1166 affine_map<() -> ()>, 1167 affine_map<() -> ()> 1168] 1169#scalar_trait = { 1170 args_in = 2, 1171 args_out = 1, 1172 iterator_types = [], 1173 indexing_maps = #scalar_access, 1174 library_call = "some_external_fn" 1175} 1176func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>, %arg3 : i1) 1177{ 1178 linalg.generic #scalar_trait 1179 ins(%arg0, %arg1 : memref<f32>, memref<f32>) 1180 outs(%arg2 : memref<f32>) { 1181 ^bb(%a : f32, %b : f32, %c : f32) : 1182 %result = scf.if %arg3 -> (f32) { 1183 scf.yield %a : f32 1184 } else { 1185 scf.yield %b : f32 1186 } 1187 linalg.yield %result : f32 1188 } 1189 return 1190} 1191// CHECK-LABEL: @scalar_code 1192// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 1193// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 1194// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 1195// CHECK-NOT: scf.for 1196// CHECK: memref.load %[[ARG0]][] 1197// CHECK: memref.load %[[ARG1]][] 1198// CHECK: scf.if 1199// CHECK: scf.yield 1200// CHECK: else 1201// CHECK: scf.yield 1202// CHECK: store %{{.*}}, %[[ARG2]][] 1203 1204// CHECKPARALLEL-LABEL: @scalar_code 1205// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 1206// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 1207// CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 1208// CHECKPARALLEL-NOT: scf.for 1209// CHECKPARALLEL: memref.load %[[ARG0]][] 1210// CHECKPARALLEL: memref.load %[[ARG1]][] 1211// CHECKPARALLEL: scf.if 1212// CHECKPARALLEL: scf.yield 1213// CHECKPARALLEL: else 1214// CHECKPARALLEL: scf.yield 1215// CHECKPARALLEL: store %{{.*}}, %[[ARG2]][] 1216 1217//----------------------------------------------------------------------------// 1218// Named ops to loops. 1219//----------------------------------------------------------------------------// 1220func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) { 1221 linalg.batch_matmul ins(%A, %B : memref<?x?x?xf32>, memref<?x?x?xf32>) 1222 outs(%C : memref<?x?x?xf32>) 1223 return 1224} 1225// CHECK-LABEL: @named_batch_matmul 1226// CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1227// CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1228// CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1229// CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32> 1230// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32> 1231// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32> 1232// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32> 1233// CHECK: scf.for %[[b:.*]] = %{{.*}} to %[[B]] 1234// CHECK: scf.for %[[m:.*]] = %{{.*}} to %[[M]] 1235// CHECK: scf.for %[[n:.*]] = %{{.*}} to %[[N]] 1236// CHECK: scf.for %[[k:.*]] = %{{.*}} to %[[K]] 1237// CHECK: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32> 1238// CHECK: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32> 1239// CHECK: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 1240// CHECK: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 1241// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1242// CHECK: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 1243 1244// CHECKPARALLEL-LABEL: @named_batch_matmul 1245// CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1246// CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1247// CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1248// CHECKPARALLEL: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32> 1249// CHECKPARALLEL: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32> 1250// CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32> 1251// CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32> 1252// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) { 1253// CHECKPARALLEL: scf.for %[[k:.*]] = %{{.*}} to %[[K]] 1254// CHECKPARALLEL: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32> 1255// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32> 1256// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 1257// CHECKPARALLEL: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 1258// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1259// CHECKPARALLEL: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 1260 1261 1262func @conv1d_no_symbols(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>) -> () { 1263 linalg.conv_1d ins(%in, %filter : memref<?xf32>, memref<?xf32>) 1264 outs(%out : memref<?xf32>) 1265 return 1266} 1267 1268// CHECK-LABEL: @conv1d_no_symbols 1269// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32> 1270// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32> 1271// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32> 1272// CHECK: %[[c0:.*]] = constant 0 : index 1273// CHECK: %[[c1:.*]] = constant 1 : index 1274// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32> 1275// CHECK: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32> 1276// CHECK: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 1277// CHECK: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 1278// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) 1279// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32> 1280// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32> 1281// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32> 1282// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 1283// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1284// CHECK: store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32> 1285 1286// CHECKPARALLEL-LABEL: @conv1d_no_symbols 1287// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32> 1288// CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32> 1289// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32> 1290// CHECKPARALLEL: %[[c0:.*]] = constant 0 : index 1291// CHECKPARALLEL: %[[c1:.*]] = constant 1 : index 1292// CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32> 1293// CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32> 1294// CHECKPARALLEL: scf.parallel (%[[b:.*]]) = (%[[c0]]) to (%[[dim1]]) step (%[[c1]]) { 1295// CHECKPARALLEL: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 1296// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) 1297// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32> 1298// CHECKPARALLEL: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32> 1299// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32> 1300// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 1301// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1302// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32> 1303 1304 1305func @conv2d_no_symbols(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out : memref<?x?xf32>) -> () { 1306 linalg.conv_2d ins(%in, %filter : memref<?x?xf32>, memref<?x?xf32>) 1307 outs(%out: memref<?x?xf32>) 1308 return 1309} 1310// CHECK-LABEL: @conv2d_no_symbols 1311// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32> 1312// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32> 1313// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32> 1314// CHECK: %[[c0:.*]] = constant 0 : index 1315// CHECK: %[[c1:.*]] = constant 1 : index 1316// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32> 1317// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32> 1318// CHECK: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32> 1319// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32> 1320// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { 1321// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { 1322// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 1323// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 1324// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) 1325// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) 1326// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32> 1327 1328// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32> 1329// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 1330 1331// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 1332// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1333// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 1334 1335// CHECKPARALLEL-LABEL: @conv2d_no_symbols 1336// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32> 1337// CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32> 1338// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32> 1339// CHECKPARALLEL: %[[c0:.*]] = constant 0 : index 1340// CHECKPARALLEL: %[[c1:.*]] = constant 1 : index 1341// CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32> 1342// CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32> 1343// CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32> 1344// CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32> 1345// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]]) step (%[[c1]], %[[c1]]) { 1346// CHECKPARALLEL: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 1347// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 1348// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) 1349// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) 1350// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32> 1351// CHECKPARALLEL: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32> 1352// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 1353// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 1354// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1355// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 1356 1357 1358func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %out : memref<?x?x?xf32>) -> () { 1359 linalg.conv_3d ins(%in, %filter : memref<?x?x?xf32>, memref<?x?x?xf32>) 1360 outs(%out : memref<?x?x?xf32>) 1361 return 1362} 1363 1364// CHECK-LABEL: @conv3d_no_symbols 1365// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1366// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1367// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1368// CHECK: %[[c2:.*]] = constant 2 : index 1369// CHECK: %[[c0:.*]] = constant 0 : index 1370// CHECK: %[[c1:.*]] = constant 1 : index 1371// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32> 1372// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32> 1373// CHECK: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32> 1374// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32> 1375// CHECK: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32> 1376// CHECK: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32> 1377// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { 1378// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] { 1379// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] { 1380// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 1381// CHECK: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 1382// CHECK: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { 1383// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) 1384// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) 1385// CHECK: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) 1386// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32> 1387 1388// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32> 1389// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 1390 1391// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 1392// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1393// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 1394 1395// CHECKPARALLEL-LABEL: @conv3d_no_symbols 1396// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1397// CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1398// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 1399// CHECKPARALLEL: %[[c2:.*]] = constant 2 : index 1400// CHECKPARALLEL: %[[c0:.*]] = constant 0 : index 1401// CHECKPARALLEL: %[[c1:.*]] = constant 1 : index 1402// CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32> 1403// CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32> 1404// CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32> 1405// CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32> 1406// CHECKPARALLEL: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32> 1407// CHECKPARALLEL: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32> 1408// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]]) step (%[[c1]], %[[c1]], %[[c1]]) { 1409// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 1410// CHECKPARALLEL: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 1411// CHECKPARALLEL: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { 1412// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) 1413// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) 1414// CHECKPARALLEL: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) 1415// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32> 1416// CHECKPARALLEL: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32> 1417// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 1418// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 1419// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 1420// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 1421