1// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=test-analysis-only -split-input-file | FileCheck %s
2
3//===----------------------------------------------------------------------===//
4// Simple cases
5//===----------------------------------------------------------------------===//
6
7// -----
8
9// CHECK-LABEL: func @extract_slice_fun
10func @extract_slice_fun(%A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true})
11  -> (tensor<4xf32>, tensor<8xf32>)
12{
13  // tensor.extract_slice is not used in a write, it is not compelled to
14  // bufferize out of place. Let callers decide whether they want to create
15  // aliasing subviews at all call sites or whether they allocate.
16  // This is true irrespective of whether the function argument is inplaceable.
17  //     CHECK: tensor.extract_slice
18  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
19  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
20
21  //     CHECK: tensor.extract_slice
22  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
23  %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
24
25  return %r0, %r1: tensor<4xf32>, tensor<8xf32>
26}
27
28// -----
29
30// CHECK-LABEL: func @insert_slice_fun
31func @insert_slice_fun(
32    %A : tensor<?xf32>,
33    %B : tensor<?xf32> {linalg.inplaceable = true},
34    %C : tensor<4xf32>)
35  -> (tensor<?xf32>, tensor<?xf32>)
36{
37  // must bufferize out of place.
38  //     CHECK: tensor.insert_slice
39  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
40  %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
41
42  // bufferizes inplace.
43  //     CHECK: tensor.insert_slice
44  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
45  %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
46
47  return %r0, %r1: tensor<?xf32>, tensor<?xf32>
48}
49
50// -----
51
52// CHECK-LABEL: func @conflict_on_B
53func @conflict_on_B(
54    %A : tensor<4x4xf32> {linalg.inplaceable = true},
55    %B : tensor<4x4xf32> {linalg.inplaceable = true})
56  -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>)
57{
58  // matmul output operand interferes with input operand.
59  //     CHECK: linalg.matmul
60  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
61  %C = linalg.matmul  ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>)
62                     outs(%B: tensor<4x4xf32>)
63    -> tensor<4x4xf32>
64
65  // matmul output operand interferes with input operand.
66  //     CHECK: linalg.matmul
67  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
68  %D = linalg.matmul  ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>)
69                     outs(%B: tensor<4x4xf32>)
70    -> tensor<4x4xf32>
71
72  // matmul output operand does not interferes with input operand.
73  //     CHECK: linalg.matmul
74  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
75  %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
76                     outs(%B: tensor<4x4xf32>)
77    -> tensor<4x4xf32>
78
79  return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>
80}
81
82//===----------------------------------------------------------------------===//
83// Length-1 producer-consumer cases.
84//===----------------------------------------------------------------------===//
85
86// -----
87
88// CHECK-LABEL: func @extract_slice_extract_slice
89func @extract_slice_extract_slice(
90    %A : tensor<?xf32> {linalg.inplaceable = true}, %B : tensor<?xf32>)
91  -> (tensor<2xf32>, tensor<2xf32>)
92{
93  // tensor.extract_slice is not used in a write, it is not compelled to
94  // bufferize out of place. Let callers decide whether they want to create
95  // aliasing subviews at all call sites or whether they allocate.
96  // This is true irrespective of whether the function argument is inplaceable.
97  // CHECK: {__inplace_results_attr__ = ["true"]}
98  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
99
100  // CHECK: {__inplace_results_attr__ = ["true"]}
101  %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
102
103  // CHECK: {__inplace_results_attr__ = ["true"]}
104  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
105
106  // CHECK: {__inplace_results_attr__ = ["true"]}
107  %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
108
109  return %r1, %r3: tensor<2xf32>, tensor<2xf32>
110}
111
112// -----
113
114// CHECK-LABEL: func @insert_slice_insert_slice
115func @insert_slice_insert_slice(
116    %A : tensor<?xf32> {linalg.inplaceable = true},
117    %A2 : tensor<4xf32> {linalg.inplaceable = true},
118    %A3 : tensor<2xf32> {linalg.inplaceable = true},
119    %B : tensor<?xf32>, %B2 : tensor<4xf32>, %B3 : tensor<2xf32>)
120  -> (tensor<?xf32>, tensor<?xf32>)
121{
122  // CHECK: {__inplace_results_attr__ = ["true"]}
123  %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
124
125  // CHECK: {__inplace_results_attr__ = ["true"]}
126  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
127
128  // CHECK: {__inplace_results_attr__ = ["false"]}
129  %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
130
131  // CHECK: {__inplace_results_attr__ = ["false"]}
132  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
133
134  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
135}
136
137// -----
138
139// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice
140func @extract_slice_nonmatching_insert_slice(
141    %A : tensor<?xf32> {linalg.inplaceable = true},
142    %B : tensor<?xf32>, %idx: index)
143  -> (tensor<?xf32>, tensor<?xf32>)
144{
145  // %r1 bufferizes inplace because %A is inplaceable.
146  // %r0 is an overlapping tensor.extract_slice that does not match, it must be
147  // out of place.
148  //      CHECK: tensor.extract_slice
149  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
150  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
151
152  // %r1 can bufferize inplace fine.
153  //      CHECK: tensor.insert_slice
154  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
155  %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
156
157  // %r3 does bufferizes inplace because %B is not inplaceable.
158  // %r0 is an overlapping tensor.extract_slice that does not match, but does
159  // not alias with the buffer coming from %r3 so it can actually bufferize
160  // inplace.
161  //      CHECK: tensor.extract_slice
162  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
163  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
164
165  // %r3 cannot bufferize inplace since %B is not inplaceable.
166  //      CHECK: tensor.insert_slice
167  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
168  %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
169
170  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
171}
172
173// -----
174
175// CHECK-LABEL: func @extract_slice_matching_insert_slice
176func @extract_slice_matching_insert_slice(
177    %A : tensor<?xf32> {linalg.inplaceable = true},
178    %B : tensor<?xf32>)
179  -> (tensor<?xf32>, tensor<?xf32>)
180{
181  // %r1 bufferizes inplace because %A is inplaceable.
182  // %r0 is a tensor.extract_slice that matches, it can also be bufferized
183  // inplace.
184  //      CHECK: tensor.extract_slice
185  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
186  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
187
188  //      CHECK: tensor.insert_slice
189  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
190  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
191
192  // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized
193  // inplace.
194  //      CHECK: tensor.extract_slice
195  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
196  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
197
198  // tensor.insert_slice cannot bufferize inplace.
199  // This should have been captured by a canonicalization pattern and it would
200  // be unproductive to have special logic in bufferization to encode matching
201  // insert_slice(extract_slice(A), A).
202  //      CHECK: tensor.insert_slice
203  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
204  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
205
206  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
207}
208
209// -----
210
211// CHECK-LABEL: func @extract_slice_linalg_readonly_use
212func @extract_slice_linalg_readonly_use(
213    %A : tensor<?x?xf32>,
214    %B : tensor<4x4xf32>,
215    %C : tensor<4x4xf32> {linalg.inplaceable = true})
216  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
217{
218  // tensor.extract_slice is only used as a read, no interference irrespective
219  // of user's inplace status.
220  //     CHECK: tensor.extract_slice
221  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
222  %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
223
224  // matmul output operand is not inplaceable at the function boundary.
225  //     CHECK: linalg.matmul
226  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
227  %D = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
228                     outs(%B: tensor<4x4xf32>)
229    -> tensor<4x4xf32>
230
231  // matmul output operand is inplaceable at the function boundary.
232  //     CHECK: linalg.matmul
233  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
234  %E = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
235                     outs(%C: tensor<4x4xf32>)
236    -> tensor<4x4xf32>
237
238  return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
239}
240
241// -----
242
243// CHECK-LABEL: func @extract_slice_to_linalg_write_use
244func @extract_slice_to_linalg_write_use(
245    %A : tensor<4x4xf32>,
246    %B : tensor<?x?xf32>,
247    %C : tensor<?x?xf32> {linalg.inplaceable = true})
248  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
249{
250  // Step 3. %sB forward propagates to a write in %D but it is not inplace.
251  // So this is only ever read and can bufferize inplace.
252  //     CHECK: tensor.extract_slice
253  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
254  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
255
256  // Step 2. %sB has a read interference in %E, it does not bufferize inplace.
257  //     CHECK: linalg.matmul
258  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
259  %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
260                     outs(%sB: tensor<4x4xf32>)
261    -> tensor<4x4xf32>
262
263  // Step 4. %sC forward propagates to an inplace write in %E.
264  // %sC backward propagates to %C which is inplaceable.
265  // As a consequence this is bufferized inplace.
266  //     CHECK: tensor.extract_slice
267  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
268  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
269
270  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
271  // considered an interference. This bufferizes inplace.
272  //     CHECK: linalg.matmul
273  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
274  %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
275                     outs(%sC: tensor<4x4xf32>)
276    -> tensor<4x4xf32>
277
278  return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
279}
280
281//===----------------------------------------------------------------------===//
282// Transitive cases
283//===----------------------------------------------------------------------===//
284
285// -----
286
287// CHECK-LABEL: func @extract_slice_to_linalg_write_use
288func @extract_slice_to_linalg_write_use(
289    %A : tensor<4x4xf32>,
290    %B : tensor<?x?xf32>,
291    %C : tensor<?x?xf32> {linalg.inplaceable = true})
292  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
293{
294  // Step 4. %sB forward propagates to an inplace write in %D.
295  // %sB backward propagates to %B which is not inplaceable.
296  // As a consequence this is bufferized out of place.
297  //     CHECK: tensor.extract_slice
298  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
299  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
300
301  // Step 1. %sB backprops to the tensor.extract_slice producer which is not
302  // considered an interference. This bufferizes inplace.
303  //     CHECK: linalg.matmul
304  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
305  %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
306                     outs(%sB: tensor<4x4xf32>)
307    -> tensor<4x4xf32>
308
309  // Step 3. %sC forward propagates to an inplace write in %E.
310  // %sC backward propagates to %C which is inplaceable.
311  // As a consequence this is bufferized inplace.
312  //     CHECK: tensor.extract_slice
313  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
314  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
315
316  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
317  // considered an interference. This bufferizes inplace.
318  //     CHECK: linalg.matmul
319  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
320  %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
321                     outs(%sC: tensor<4x4xf32>)
322    -> tensor<4x4xf32>
323
324  return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
325}
326
327// -----
328
329// CHECK-LABEL: func @nested_extract_slice_and_insert
330func @nested_extract_slice_and_insert(
331    %A : tensor<?x?xf32>,
332    %B : tensor<?x?xf32> {linalg.inplaceable = true},
333    %C : tensor<?x?xf32> {linalg.inplaceable = true},
334    %idx : index)
335  ->  (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
336{
337  %f0 = constant 0.0 : f32
338
339  // 2-level matching tensor.extract_slice / tensor.insert_slice into non
340  // inplaceable %A.
341  //   - %rA is not inplaceable because %A is not inplaceable at function boundary.
342  //   - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable
343  //   - this propagates to %FA and %ssA being inplaceable.
344  //   - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not
345  //     inplaceable and so %sA is not inplaceable.
346  //     CHECK: tensor.extract_slice
347  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
348  // CHECK-NEXT: tensor.extract_slice
349  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
350  // CHECK-NEXT: fill
351  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
352  // CHECK-NEXT: tensor.insert_slice
353  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
354  // CHECK-NEXT: tensor.insert_slice
355  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
356  %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
357  %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
358  %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32>
359  %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
360  %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
361
362  // 3-level matching tensor.extract_slice / tensor.insert_slice into
363  // inplaceable %B.
364  // CHECK-NEXT: tensor.extract_slice
365  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
366  // CHECK-NEXT: tensor.extract_slice
367  // Atm, this 2nd tensor.extract_slice fails to bufferize inplace because
368  // clobbering analysis conservatively test for equivalent buffers.
369  // TODO: This is currently too restrictive and misses clobberings.
370  // When available, use container-containee analysis.
371  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
372  // CHECK-NEXT: tensor.extract_slice
373  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
374  // CHECK-NEXT: fill
375  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
376  // CHECK-NEXT: tensor.insert_slice
377  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
378  // CHECK-NEXT: tensor.insert_slice
379  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
380  // CHECK-NEXT: tensor.insert_slice
381  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
382  %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
383  %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
384  %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
385  %FB = linalg.fill(%f0, %sssB) : f32, tensor<4x4xf32> -> tensor<4x4xf32>
386  %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
387  %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
388  %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
389
390  // 2-level matching tensor.extract_slice / tensor.insert_slice into
391  // inplaceable %C with a twist.
392  // Throw a wrench in the system: %rsC production sizes do not match %ssC.
393  // CHECK-NEXT: tensor.extract_slice
394  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
395  // The tensor.insert_slice that would be candidate for matching does not actually
396  // match. That tensor.insert_slice can still be bufferized inplace nonetheless
397  // but this tensor.extract_slice, which bufferizes to an inplace write, cannot.
398  // CHECK-NEXT: tensor.extract_slice
399  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
400  // CHECK-NEXT: fill
401  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
402  // CHECK-NEXT: tensor.insert_slice
403  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
404  // CHECK-NEXT: tensor.insert_slice
405  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
406  %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
407  %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
408  %FC = linalg.fill(%f0, %ssC) : f32, tensor<4x4xf32> -> tensor<4x4xf32>
409  %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
410  %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
411
412  return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
413}
414
415//===----------------------------------------------------------------------===//
416// Simple loop cases
417//===----------------------------------------------------------------------===//
418
419// -----
420
421// CHECK-LABEL: func @scf_for_yield_only
422func @scf_for_yield_only(%A : tensor<?xf32>,
423                         %B : tensor<?xf32> {linalg.inplaceable = true},
424                         %lb : index, %ub : index, %step : index)
425  -> (tensor<?xf32>, tensor<?xf32>)
426{
427  //      CHECK: scf.for
428  // CHECK-NEXT: scf.yield
429  // CHECK-NEXT: {__inplace_results_attr__ = ["false"]}
430  %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
431    scf.yield %t : tensor<?xf32>
432  }
433
434  //      CHECK: scf.for
435  // CHECK-NEXT: scf.yield
436  // CHECK-NEXT: {__inplace_results_attr__ = ["true"]}
437  %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) {
438    scf.yield %t : tensor<?xf32>
439  }
440
441  return %r0, %r1: tensor<?xf32>, tensor<?xf32>
442}
443
444// -----
445
446// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
447func @scf_for_with_tensor.insert_slice(%A : tensor<?xf32>,
448              %B : tensor<?xf32> {linalg.inplaceable = true},
449              %C : tensor<4xf32>,
450              %lb : index, %ub : index, %step : index)
451  -> (tensor<?xf32>, tensor<?xf32>)
452{
453  //      CHECK: scf.for
454  // scf.for bbArgs are always inplaceable seen from ops inside the body:
455  //   1. Either the matching tensor is not inplaceable and an alloc occurs
456  //      which makes bbArg inplaceable.
457  //   2. Or it is already inplaceable and so is bbArg.
458  // CHECK-NEXT:   tensor.insert_slice
459  // CHECK-SAME:     {__inplace_results_attr__ = ["true"]}
460  // CHECK-NEXT:   tensor.insert_slice
461  // CHECK-SAME:     {__inplace_results_attr__ = ["true"]}
462  // CHECK-NEXT:   scf.yield
463  // CHECK-NEXT: {__inplace_results_attr__ = ["false", "true"]}
464  %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
465      -> (tensor<?xf32>, tensor<?xf32>)
466  {
467    %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
468    %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
469    scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
470  }
471
472  return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
473}
474
475// -----
476
477func private @some_use(tensor<?xf32>) -> ()
478
479// CHECK-LABEL: func @scf_for_deps
480func @scf_for_deps(%A : tensor<?xf32> {linalg.inplaceable = true},
481                   %B : tensor<?xf32> {linalg.inplaceable = true},
482                   %lb : index, %ub : index, %step : index)
483  -> (tensor<?xf32>, tensor<?xf32>)
484{
485  // %r0 must be out of place because one use of %t in the subsequent production
486  // of %r1 is read.
487  //      CHECK: scf.for
488  // CHECK-NEXT: call
489  // CHECK-NEXT: scf.yield
490  // CHECK-NEXT: {__inplace_results_attr__ = ["false"]}
491  %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
492    call @some_use(%t) : (tensor<?xf32>) -> ()
493    scf.yield %t : tensor<?xf32>
494  }
495
496  // %r1 bufferizes inplace fine.
497  //      CHECK: scf.for
498  // CHECK-NEXT: call
499  // CHECK-NEXT: scf.yield
500  // CHECK-NEXT: {__inplace_results_attr__ = ["true"]}
501  %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
502    call @some_use(%t) : (tensor<?xf32>) -> ()
503    scf.yield %t : tensor<?xf32>
504  }
505
506  // %r2 must be out of place because one use of %t in the subsequent production
507  // of %r3 is read.
508  //      CHECK: linalg.tiled_loop
509  // CHECK-NEXT: call
510  // CHECK-NEXT: linalg.yield
511  // CHECK-NEXT: {__inplace_results_attr__ = ["false"]}
512  %r2 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step)
513        ins()
514        outs(%t = %B: tensor<?xf32>) {
515    call @some_use(%t) : (tensor<?xf32>) -> ()
516    linalg.yield %t : tensor<?xf32>
517  }
518
519  // %r3 bufferizes inplace fine.
520  //      CHECK: linalg.tiled_loop
521  // CHECK-NEXT: call
522  // CHECK-NEXT: linalg.yield
523  // CHECK-NEXT: {__inplace_results_attr__ = ["true"]}
524  %r3 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step)
525        ins()
526        outs(%t = %B: tensor<?xf32>) {
527    call @some_use(%t) : (tensor<?xf32>) -> ()
528    linalg.yield %t : tensor<?xf32>
529  }
530
531  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
532}
533
534// -----
535
536//===----------------------------------------------------------------------===//
537// Cross function boundary cases.
538//===----------------------------------------------------------------------===//
539
540func private @foo(tensor<64xf32>)
541
542// CHECK-LABEL: dependence_through_call
543func @dependence_through_call(%I : tensor<64xf32> {linalg.inplaceable = true}) {
544  %f1 = constant 1.000000e+00 : f32
545  %f2 = constant 2.000000e+00 : f32
546
547  // 2. %B already bufferizes inplace, %A would alias and have a different
548  // value. The calls to `foo` are determined to read conservatively, so %A
549  // cannot bufferize inplace.
550  //     CHECK: fill
551  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
552  %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32>
553
554  // 1. Bufferizes inplace: no alias to %A is yet possible.
555  //     CHECK: fill
556  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
557  %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32>
558
559  call @foo(%A) : (tensor<64xf32>) -> ()
560  call @foo(%B) : (tensor<64xf32>) -> ()
561
562  return
563}
564
565// -----
566
567func private @foo(tensor<64xf32>)
568
569func private @bar(%A : tensor<64xf32>) {
570  call @foo(%A) : (tensor<64xf32>) -> ()
571  return
572}
573
574func @read_dependence_through_scf_and_call(
575    %I : tensor<64xf32> {linalg.inplaceable = true},
576    %I2 : tensor<64xf32> {linalg.inplaceable = true}) {
577  %c0 = constant 0 : index
578  %c1 = constant 1 : index
579  %c10 = constant 10 : index
580  %f1 = constant 1.000000e+00 : f32
581  %f2 = constant 2.000000e+00 : f32
582
583  // 5. %B bufferizes inplace, %A would alias and have a different value.
584  // The calls to `foo` are determined to read conservatively, so %A cannot
585  // bufferize inplace.
586  //     CHECK: fill
587  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
588  %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32>
589
590  // 4. Bufferizes inplace: no alias to %A is yet possible.
591  //     CHECK: fill
592  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
593  %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32>
594
595  // 3. Does not read or write, bufferizes inplace.
596  //     CHECK: scf.for
597  //     CHECK: {__inplace_results_attr__ = ["true", "true"]}
598  %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B)
599    -> (tensor<64xf32>, tensor<64xf32>)
600  {
601    scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32>
602  }
603  call @foo(%r#0) : (tensor<64xf32>) -> ()
604  call @foo(%r#1) : (tensor<64xf32>) -> ()
605
606  // 2. %B2 already bufferizes inplace, %A2 would alias and have a different
607  // value. The calls to `foo` are determined to read conservatively, so %A2
608  // cannot bufferize inplace.
609  //     CHECK: fill
610  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
611  %A2 = linalg.fill(%f1, %I2) : f32, tensor<64xf32> -> tensor<64xf32>
612
613  // 1. Bufferizes inplace: no alias to %A2 is yet possible.
614  //     CHECK: fill
615  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
616  %B2 = linalg.fill(%f2, %I2) : f32, tensor<64xf32> -> tensor<64xf32>
617
618  call @bar(%A2) : (tensor<64xf32>) -> ()
619  call @bar(%B2) : (tensor<64xf32>) -> ()
620  return
621}
622