1// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=test-analysis-only -split-input-file | FileCheck %s 2 3//===----------------------------------------------------------------------===// 4// Simple cases 5//===----------------------------------------------------------------------===// 6 7// ----- 8 9// CHECK-LABEL: func @extract_slice_fun 10func @extract_slice_fun(%A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true}) 11 -> (tensor<4xf32>, tensor<8xf32>) 12{ 13 // tensor.extract_slice is not used in a write, it is not compelled to 14 // bufferize out of place. Let callers decide whether they want to create 15 // aliasing subviews at all call sites or whether they allocate. 16 // This is true irrespective of whether the function argument is inplaceable. 17 // CHECK: tensor.extract_slice 18 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 19 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 20 21 // CHECK: tensor.extract_slice 22 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 23 %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32> 24 25 return %r0, %r1: tensor<4xf32>, tensor<8xf32> 26} 27 28// ----- 29 30// CHECK-LABEL: func @insert_slice_fun 31func @insert_slice_fun( 32 %A : tensor<?xf32>, 33 %B : tensor<?xf32> {linalg.inplaceable = true}, 34 %C : tensor<4xf32>) 35 -> (tensor<?xf32>, tensor<?xf32>) 36{ 37 // must bufferize out of place. 38 // CHECK: tensor.insert_slice 39 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 40 %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 41 42 // bufferizes inplace. 43 // CHECK: tensor.insert_slice 44 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 45 %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 46 47 return %r0, %r1: tensor<?xf32>, tensor<?xf32> 48} 49 50// ----- 51 52// CHECK-LABEL: func @conflict_on_B 53func @conflict_on_B( 54 %A : tensor<4x4xf32> {linalg.inplaceable = true}, 55 %B : tensor<4x4xf32> {linalg.inplaceable = true}) 56 -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>) 57{ 58 // matmul output operand interferes with input operand. 59 // CHECK: linalg.matmul 60 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 61 %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) 62 outs(%B: tensor<4x4xf32>) 63 -> tensor<4x4xf32> 64 65 // matmul output operand interferes with input operand. 66 // CHECK: linalg.matmul 67 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 68 %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) 69 outs(%B: tensor<4x4xf32>) 70 -> tensor<4x4xf32> 71 72 // matmul output operand does not interferes with input operand. 73 // CHECK: linalg.matmul 74 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 75 %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) 76 outs(%B: tensor<4x4xf32>) 77 -> tensor<4x4xf32> 78 79 return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32> 80} 81 82//===----------------------------------------------------------------------===// 83// Length-1 producer-consumer cases. 84//===----------------------------------------------------------------------===// 85 86// ----- 87 88// CHECK-LABEL: func @extract_slice_extract_slice 89func @extract_slice_extract_slice( 90 %A : tensor<?xf32> {linalg.inplaceable = true}, %B : tensor<?xf32>) 91 -> (tensor<2xf32>, tensor<2xf32>) 92{ 93 // tensor.extract_slice is not used in a write, it is not compelled to 94 // bufferize out of place. Let callers decide whether they want to create 95 // aliasing subviews at all call sites or whether they allocate. 96 // This is true irrespective of whether the function argument is inplaceable. 97 // CHECK: {__inplace_results_attr__ = ["true"]} 98 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 99 100 // CHECK: {__inplace_results_attr__ = ["true"]} 101 %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> 102 103 // CHECK: {__inplace_results_attr__ = ["true"]} 104 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 105 106 // CHECK: {__inplace_results_attr__ = ["true"]} 107 %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> 108 109 return %r1, %r3: tensor<2xf32>, tensor<2xf32> 110} 111 112// ----- 113 114// CHECK-LABEL: func @insert_slice_insert_slice 115func @insert_slice_insert_slice( 116 %A : tensor<?xf32> {linalg.inplaceable = true}, 117 %A2 : tensor<4xf32> {linalg.inplaceable = true}, 118 %A3 : tensor<2xf32> {linalg.inplaceable = true}, 119 %B : tensor<?xf32>, %B2 : tensor<4xf32>, %B3 : tensor<2xf32>) 120 -> (tensor<?xf32>, tensor<?xf32>) 121{ 122 // CHECK: {__inplace_results_attr__ = ["true"]} 123 %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> 124 125 // CHECK: {__inplace_results_attr__ = ["true"]} 126 %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 127 128 // CHECK: {__inplace_results_attr__ = ["false"]} 129 %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> 130 131 // CHECK: {__inplace_results_attr__ = ["false"]} 132 %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 133 134 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 135} 136 137// ----- 138 139// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice 140func @extract_slice_nonmatching_insert_slice( 141 %A : tensor<?xf32> {linalg.inplaceable = true}, 142 %B : tensor<?xf32>, %idx: index) 143 -> (tensor<?xf32>, tensor<?xf32>) 144{ 145 // %r1 bufferizes inplace because %A is inplaceable. 146 // %r0 is an overlapping tensor.extract_slice that does not match, it must be 147 // out of place. 148 // CHECK: tensor.extract_slice 149 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 150 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 151 152 // %r1 can bufferize inplace fine. 153 // CHECK: tensor.insert_slice 154 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 155 %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32> 156 157 // %r3 does bufferizes inplace because %B is not inplaceable. 158 // %r0 is an overlapping tensor.extract_slice that does not match, but does 159 // not alias with the buffer coming from %r3 so it can actually bufferize 160 // inplace. 161 // CHECK: tensor.extract_slice 162 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 163 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 164 165 // %r3 cannot bufferize inplace since %B is not inplaceable. 166 // CHECK: tensor.insert_slice 167 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 168 %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32> 169 170 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 171} 172 173// ----- 174 175// CHECK-LABEL: func @extract_slice_matching_insert_slice 176func @extract_slice_matching_insert_slice( 177 %A : tensor<?xf32> {linalg.inplaceable = true}, 178 %B : tensor<?xf32>) 179 -> (tensor<?xf32>, tensor<?xf32>) 180{ 181 // %r1 bufferizes inplace because %A is inplaceable. 182 // %r0 is a tensor.extract_slice that matches, it can also be bufferized 183 // inplace. 184 // CHECK: tensor.extract_slice 185 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 186 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 187 188 // CHECK: tensor.insert_slice 189 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 190 %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 191 192 // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized 193 // inplace. 194 // CHECK: tensor.extract_slice 195 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 196 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 197 198 // tensor.insert_slice cannot bufferize inplace. 199 // This should have been captured by a canonicalization pattern and it would 200 // be unproductive to have special logic in bufferization to encode matching 201 // insert_slice(extract_slice(A), A). 202 // CHECK: tensor.insert_slice 203 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 204 %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 205 206 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 207} 208 209// ----- 210 211// CHECK-LABEL: func @extract_slice_linalg_readonly_use 212func @extract_slice_linalg_readonly_use( 213 %A : tensor<?x?xf32>, 214 %B : tensor<4x4xf32>, 215 %C : tensor<4x4xf32> {linalg.inplaceable = true}) 216 -> (tensor<4x4xf32>, tensor<4x4xf32>) 217{ 218 // tensor.extract_slice is only used as a read, no interference irrespective 219 // of user's inplace status. 220 // CHECK: tensor.extract_slice 221 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 222 %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 223 224 // matmul output operand is not inplaceable at the function boundary. 225 // CHECK: linalg.matmul 226 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 227 %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) 228 outs(%B: tensor<4x4xf32>) 229 -> tensor<4x4xf32> 230 231 // matmul output operand is inplaceable at the function boundary. 232 // CHECK: linalg.matmul 233 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 234 %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) 235 outs(%C: tensor<4x4xf32>) 236 -> tensor<4x4xf32> 237 238 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 239} 240 241// ----- 242 243// CHECK-LABEL: func @extract_slice_to_linalg_write_use 244func @extract_slice_to_linalg_write_use( 245 %A : tensor<4x4xf32>, 246 %B : tensor<?x?xf32>, 247 %C : tensor<?x?xf32> {linalg.inplaceable = true}) 248 -> (tensor<4x4xf32>, tensor<4x4xf32>) 249{ 250 // Step 3. %sB forward propagates to a write in %D but it is not inplace. 251 // So this is only ever read and can bufferize inplace. 252 // CHECK: tensor.extract_slice 253 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 254 %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 255 256 // Step 2. %sB has a read interference in %E, it does not bufferize inplace. 257 // CHECK: linalg.matmul 258 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 259 %D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>) 260 outs(%sB: tensor<4x4xf32>) 261 -> tensor<4x4xf32> 262 263 // Step 4. %sC forward propagates to an inplace write in %E. 264 // %sC backward propagates to %C which is inplaceable. 265 // As a consequence this is bufferized inplace. 266 // CHECK: tensor.extract_slice 267 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 268 %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 269 270 // Step 1. %sC backprops to the tensor.extract_slice producer which is not 271 // considered an interference. This bufferizes inplace. 272 // CHECK: linalg.matmul 273 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 274 %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) 275 outs(%sC: tensor<4x4xf32>) 276 -> tensor<4x4xf32> 277 278 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 279} 280 281//===----------------------------------------------------------------------===// 282// Transitive cases 283//===----------------------------------------------------------------------===// 284 285// ----- 286 287// CHECK-LABEL: func @extract_slice_to_linalg_write_use 288func @extract_slice_to_linalg_write_use( 289 %A : tensor<4x4xf32>, 290 %B : tensor<?x?xf32>, 291 %C : tensor<?x?xf32> {linalg.inplaceable = true}) 292 -> (tensor<4x4xf32>, tensor<4x4xf32>) 293{ 294 // Step 4. %sB forward propagates to an inplace write in %D. 295 // %sB backward propagates to %B which is not inplaceable. 296 // As a consequence this is bufferized out of place. 297 // CHECK: tensor.extract_slice 298 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 299 %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 300 301 // Step 1. %sB backprops to the tensor.extract_slice producer which is not 302 // considered an interference. This bufferizes inplace. 303 // CHECK: linalg.matmul 304 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 305 %D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>) 306 outs(%sB: tensor<4x4xf32>) 307 -> tensor<4x4xf32> 308 309 // Step 3. %sC forward propagates to an inplace write in %E. 310 // %sC backward propagates to %C which is inplaceable. 311 // As a consequence this is bufferized inplace. 312 // CHECK: tensor.extract_slice 313 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 314 %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 315 316 // Step 1. %sC backprops to the tensor.extract_slice producer which is not 317 // considered an interference. This bufferizes inplace. 318 // CHECK: linalg.matmul 319 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 320 %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) 321 outs(%sC: tensor<4x4xf32>) 322 -> tensor<4x4xf32> 323 324 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 325} 326 327// ----- 328 329// CHECK-LABEL: func @nested_extract_slice_and_insert 330func @nested_extract_slice_and_insert( 331 %A : tensor<?x?xf32>, 332 %B : tensor<?x?xf32> {linalg.inplaceable = true}, 333 %C : tensor<?x?xf32> {linalg.inplaceable = true}, 334 %idx : index) 335 -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) 336{ 337 %f0 = constant 0.0 : f32 338 339 // 2-level matching tensor.extract_slice / tensor.insert_slice into non 340 // inplaceable %A. 341 // - %rA is not inplaceable because %A is not inplaceable at function boundary. 342 // - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable 343 // - this propagates to %FA and %ssA being inplaceable. 344 // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not 345 // inplaceable and so %sA is not inplaceable. 346 // CHECK: tensor.extract_slice 347 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 348 // CHECK-NEXT: tensor.extract_slice 349 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 350 // CHECK-NEXT: fill 351 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 352 // CHECK-NEXT: tensor.insert_slice 353 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 354 // CHECK-NEXT: tensor.insert_slice 355 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 356 %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 357 %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 358 %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32> 359 %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32> 360 %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 361 362 // 3-level matching tensor.extract_slice / tensor.insert_slice into 363 // inplaceable %B. 364 // CHECK-NEXT: tensor.extract_slice 365 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 366 // CHECK-NEXT: tensor.extract_slice 367 // Atm, this 2nd tensor.extract_slice fails to bufferize inplace because 368 // clobbering analysis conservatively test for equivalent buffers. 369 // TODO: This is currently too restrictive and misses clobberings. 370 // When available, use container-containee analysis. 371 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 372 // CHECK-NEXT: tensor.extract_slice 373 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 374 // CHECK-NEXT: fill 375 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 376 // CHECK-NEXT: tensor.insert_slice 377 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 378 // CHECK-NEXT: tensor.insert_slice 379 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 380 // CHECK-NEXT: tensor.insert_slice 381 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 382 %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 383 %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32> 384 %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> 385 %FB = linalg.fill(%f0, %sssB) : f32, tensor<4x4xf32> -> tensor<4x4xf32> 386 %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> 387 %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32> 388 %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 389 390 // 2-level matching tensor.extract_slice / tensor.insert_slice into 391 // inplaceable %C with a twist. 392 // Throw a wrench in the system: %rsC production sizes do not match %ssC. 393 // CHECK-NEXT: tensor.extract_slice 394 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 395 // The tensor.insert_slice that would be candidate for matching does not actually 396 // match. That tensor.insert_slice can still be bufferized inplace nonetheless 397 // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. 398 // CHECK-NEXT: tensor.extract_slice 399 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 400 // CHECK-NEXT: fill 401 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 402 // CHECK-NEXT: tensor.insert_slice 403 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 404 // CHECK-NEXT: tensor.insert_slice 405 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 406 %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 407 %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 408 %FC = linalg.fill(%f0, %ssC) : f32, tensor<4x4xf32> -> tensor<4x4xf32> 409 %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor<?x?xf32> 410 %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 411 412 return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32> 413} 414 415//===----------------------------------------------------------------------===// 416// Simple loop cases 417//===----------------------------------------------------------------------===// 418 419// ----- 420 421// CHECK-LABEL: func @scf_for_yield_only 422func @scf_for_yield_only(%A : tensor<?xf32>, 423 %B : tensor<?xf32> {linalg.inplaceable = true}, 424 %lb : index, %ub : index, %step : index) 425 -> (tensor<?xf32>, tensor<?xf32>) 426{ 427 // CHECK: scf.for 428 // CHECK-NEXT: scf.yield 429 // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} 430 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 431 scf.yield %t : tensor<?xf32> 432 } 433 434 // CHECK: scf.for 435 // CHECK-NEXT: scf.yield 436 // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} 437 %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) { 438 scf.yield %t : tensor<?xf32> 439 } 440 441 return %r0, %r1: tensor<?xf32>, tensor<?xf32> 442} 443 444// ----- 445 446// CHECK-LABEL: func @scf_for_with_tensor.insert_slice 447func @scf_for_with_tensor.insert_slice(%A : tensor<?xf32>, 448 %B : tensor<?xf32> {linalg.inplaceable = true}, 449 %C : tensor<4xf32>, 450 %lb : index, %ub : index, %step : index) 451 -> (tensor<?xf32>, tensor<?xf32>) 452{ 453 // CHECK: scf.for 454 // scf.for bbArgs are always inplaceable seen from ops inside the body: 455 // 1. Either the matching tensor is not inplaceable and an alloc occurs 456 // which makes bbArg inplaceable. 457 // 2. Or it is already inplaceable and so is bbArg. 458 // CHECK-NEXT: tensor.insert_slice 459 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 460 // CHECK-NEXT: tensor.insert_slice 461 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 462 // CHECK-NEXT: scf.yield 463 // CHECK-NEXT: {__inplace_results_attr__ = ["false", "true"]} 464 %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) 465 -> (tensor<?xf32>, tensor<?xf32>) 466 { 467 %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32> 468 %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32> 469 scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32> 470 } 471 472 return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32> 473} 474 475// ----- 476 477func private @some_use(tensor<?xf32>) -> () 478 479// CHECK-LABEL: func @scf_for_deps 480func @scf_for_deps(%A : tensor<?xf32> {linalg.inplaceable = true}, 481 %B : tensor<?xf32> {linalg.inplaceable = true}, 482 %lb : index, %ub : index, %step : index) 483 -> (tensor<?xf32>, tensor<?xf32>) 484{ 485 // %r0 must be out of place because one use of %t in the subsequent production 486 // of %r1 is read. 487 // CHECK: scf.for 488 // CHECK-NEXT: call 489 // CHECK-NEXT: scf.yield 490 // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} 491 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 492 call @some_use(%t) : (tensor<?xf32>) -> () 493 scf.yield %t : tensor<?xf32> 494 } 495 496 // %r1 bufferizes inplace fine. 497 // CHECK: scf.for 498 // CHECK-NEXT: call 499 // CHECK-NEXT: scf.yield 500 // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} 501 %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 502 call @some_use(%t) : (tensor<?xf32>) -> () 503 scf.yield %t : tensor<?xf32> 504 } 505 506 // %r2 must be out of place because one use of %t in the subsequent production 507 // of %r3 is read. 508 // CHECK: linalg.tiled_loop 509 // CHECK-NEXT: call 510 // CHECK-NEXT: linalg.yield 511 // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} 512 %r2 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step) 513 ins() 514 outs(%t = %B: tensor<?xf32>) { 515 call @some_use(%t) : (tensor<?xf32>) -> () 516 linalg.yield %t : tensor<?xf32> 517 } 518 519 // %r3 bufferizes inplace fine. 520 // CHECK: linalg.tiled_loop 521 // CHECK-NEXT: call 522 // CHECK-NEXT: linalg.yield 523 // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} 524 %r3 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step) 525 ins() 526 outs(%t = %B: tensor<?xf32>) { 527 call @some_use(%t) : (tensor<?xf32>) -> () 528 linalg.yield %t : tensor<?xf32> 529 } 530 531 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 532} 533 534// ----- 535 536//===----------------------------------------------------------------------===// 537// Cross function boundary cases. 538//===----------------------------------------------------------------------===// 539 540func private @foo(tensor<64xf32>) 541 542// CHECK-LABEL: dependence_through_call 543func @dependence_through_call(%I : tensor<64xf32> {linalg.inplaceable = true}) { 544 %f1 = constant 1.000000e+00 : f32 545 %f2 = constant 2.000000e+00 : f32 546 547 // 2. %B already bufferizes inplace, %A would alias and have a different 548 // value. The calls to `foo` are determined to read conservatively, so %A 549 // cannot bufferize inplace. 550 // CHECK: fill 551 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 552 %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> 553 554 // 1. Bufferizes inplace: no alias to %A is yet possible. 555 // CHECK: fill 556 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 557 %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> 558 559 call @foo(%A) : (tensor<64xf32>) -> () 560 call @foo(%B) : (tensor<64xf32>) -> () 561 562 return 563} 564 565// ----- 566 567func private @foo(tensor<64xf32>) 568 569func private @bar(%A : tensor<64xf32>) { 570 call @foo(%A) : (tensor<64xf32>) -> () 571 return 572} 573 574func @read_dependence_through_scf_and_call( 575 %I : tensor<64xf32> {linalg.inplaceable = true}, 576 %I2 : tensor<64xf32> {linalg.inplaceable = true}) { 577 %c0 = constant 0 : index 578 %c1 = constant 1 : index 579 %c10 = constant 10 : index 580 %f1 = constant 1.000000e+00 : f32 581 %f2 = constant 2.000000e+00 : f32 582 583 // 5. %B bufferizes inplace, %A would alias and have a different value. 584 // The calls to `foo` are determined to read conservatively, so %A cannot 585 // bufferize inplace. 586 // CHECK: fill 587 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 588 %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> 589 590 // 4. Bufferizes inplace: no alias to %A is yet possible. 591 // CHECK: fill 592 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 593 %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> 594 595 // 3. Does not read or write, bufferizes inplace. 596 // CHECK: scf.for 597 // CHECK: {__inplace_results_attr__ = ["true", "true"]} 598 %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B) 599 -> (tensor<64xf32>, tensor<64xf32>) 600 { 601 scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32> 602 } 603 call @foo(%r#0) : (tensor<64xf32>) -> () 604 call @foo(%r#1) : (tensor<64xf32>) -> () 605 606 // 2. %B2 already bufferizes inplace, %A2 would alias and have a different 607 // value. The calls to `foo` are determined to read conservatively, so %A2 608 // cannot bufferize inplace. 609 // CHECK: fill 610 // CHECK-SAME: {__inplace_results_attr__ = ["false"]} 611 %A2 = linalg.fill(%f1, %I2) : f32, tensor<64xf32> -> tensor<64xf32> 612 613 // 1. Bufferizes inplace: no alias to %A2 is yet possible. 614 // CHECK: fill 615 // CHECK-SAME: {__inplace_results_attr__ = ["true"]} 616 %B2 = linalg.fill(%f2, %I2) : f32, tensor<64xf32> -> tensor<64xf32> 617 618 call @bar(%A2) : (tensor<64xf32>) -> () 619 call @bar(%B2) : (tensor<64xf32>) -> () 620 return 621} 622