1// RUN: mlir-opt -promote-buffers-to-stack -split-input-file %s | FileCheck %s 2 3// This file checks the behavior of PromoteBuffersToStack pass for converting 4// AllocOps into AllocaOps, if possible. 5 6// Test Case: 7// bb0 8// / \ 9// bb1 bb2 <- Initial position of AllocOp 10// \ / 11// bb3 12// PromoteBuffersToStack expected behavior: It should convert %0 into an 13// AllocaOp. 14 15#map0 = affine_map<(d0) -> (d0)> 16 17// CHECK-LABEL: func @condBranch 18func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 19 cond_br %arg0, ^bb1, ^bb2 20^bb1: 21 br ^bb3(%arg1 : memref<2xf32>) 22^bb2: 23 %0 = alloc() : memref<2xf32> 24 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 25 br ^bb3(%0 : memref<2xf32>) 26^bb3(%1: memref<2xf32>): 27 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 28 return 29} 30 31// CHECK-NEXT: cond_br {{.*}} 32// CHECK: ^bb2 33// CHECK-NEXT: %[[ALLOCA:.*]] = alloca() 34// CHECK: test.copy 35// CHECK-NEXT: return 36 37// ----- 38 39// Test Case: 40// bb0 41// / \ 42// bb1 bb2 <- Initial position of AllocOp 43// \ / 44// bb3 45// PromoteBuffersToStack expected behavior: 46// Since the alloc has dynamic type, it is not converted into an alloca. 47 48#map0 = affine_map<(d0) -> (d0)> 49 50// CHECK-LABEL: func @condBranchDynamicType 51func @condBranchDynamicType( 52 %arg0: i1, 53 %arg1: memref<?xf32>, 54 %arg2: memref<?xf32>, 55 %arg3: index) { 56 cond_br %arg0, ^bb1, ^bb2(%arg3: index) 57^bb1: 58 br ^bb3(%arg1 : memref<?xf32>) 59^bb2(%0: index): 60 %1 = alloc(%0) : memref<?xf32> 61 test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>) 62 br ^bb3(%1 : memref<?xf32>) 63^bb3(%2: memref<?xf32>): 64 test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>) 65 return 66} 67 68// CHECK-NEXT: cond_br 69// CHECK: ^bb2 70// CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) 71// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]]) 72// CHECK-NEXT: test.buffer_based 73// CHECK: br ^bb3 74// CHECK-NEXT: ^bb3(%[[ALLOC0:.*]]:{{.*}}) 75// CHECK: test.copy(%[[ALLOC0]], 76// CHECK-NEXT: return 77 78// ----- 79 80// Test Case: Existing AllocOp with no users. 81// PromoteBuffersToStack expected behavior: It should convert it to an 82// AllocaOp. 83 84// CHECK-LABEL: func @emptyUsesValue 85func @emptyUsesValue(%arg0: memref<4xf32>) { 86 %0 = alloc() : memref<4xf32> 87 return 88} 89// CHECK-NEXT: %[[ALLOCA:.*]] = alloca() 90// CHECK-NEXT: return 91 92// ----- 93 94// Test Case: 95// bb0 96// / \ 97// | bb1 <- Initial position of AllocOp 98// \ / 99// bb2 100// PromoteBuffersToStack expected behavior: It should convert it into an 101// AllocaOp. 102 103#map0 = affine_map<(d0) -> (d0)> 104 105// CHECK-LABEL: func @criticalEdge 106func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 107 cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) 108^bb1: 109 %0 = alloc() : memref<2xf32> 110 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 111 br ^bb2(%0 : memref<2xf32>) 112^bb2(%1: memref<2xf32>): 113 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 114 return 115} 116 117// CHECK-NEXT: cond_br {{.*}} 118// CHECK: ^bb1 119// CHECK-NEXT: %[[ALLOCA:.*]] = alloca() 120// CHECK: test.copy 121// CHECK-NEXT: return 122 123// ----- 124 125// Test Case: 126// bb0 <- Initial position of AllocOp 127// / \ 128// | bb1 129// \ / 130// bb2 131// PromoteBuffersToStack expected behavior: It converts the alloc in an alloca. 132 133#map0 = affine_map<(d0) -> (d0)> 134 135// CHECK-LABEL: func @invCriticalEdge 136func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 137 %0 = alloc() : memref<2xf32> 138 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 139 cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) 140^bb1: 141 br ^bb2(%0 : memref<2xf32>) 142^bb2(%1: memref<2xf32>): 143 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 144 return 145} 146 147// CHECK-NEXT: %[[ALLOCA:.*]] = alloca() 148// CHECK: cond_br 149// CHECK: test.copy 150// CHECK-NEXT: return 151 152// ----- 153 154// Test Case: 155// bb0 <- Initial position of the first AllocOp 156// / \ 157// bb1 bb2 158// \ / 159// bb3 <- Initial position of the second AllocOp 160// PromoteBuffersToStack expected behavior: It converts the allocs into allocas. 161 162#map0 = affine_map<(d0) -> (d0)> 163 164// CHECK-LABEL: func @ifElse 165func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 166 %0 = alloc() : memref<2xf32> 167 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 168 cond_br %arg0, 169 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 170 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 171^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 172 br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) 173^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 174 br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) 175^bb3(%5: memref<2xf32>, %6: memref<2xf32>): 176 %7 = alloc() : memref<2xf32> 177 test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) 178 test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) 179 return 180} 181 182// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca() 183// CHECK-NEXT: test.buffer_based 184// CHECK: %[[ALLOCA1:.*]] = alloca() 185// CHECK: test.buffer_based 186// CHECK: test.copy(%[[ALLOCA1]] 187// CHECK-NEXT: return 188 189// ----- 190 191// Test Case: No users for buffer in if-else CFG 192// bb0 <- Initial position of AllocOp 193// / \ 194// bb1 bb2 195// \ / 196// bb3 197// PromoteBuffersToStack expected behavior: It converts the alloc into alloca. 198 199#map0 = affine_map<(d0) -> (d0)> 200 201// CHECK-LABEL: func @ifElseNoUsers 202func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 203 %0 = alloc() : memref<2xf32> 204 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 205 cond_br %arg0, 206 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 207 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 208^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 209 br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) 210^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 211 br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) 212^bb3(%5: memref<2xf32>, %6: memref<2xf32>): 213 test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) 214 return 215} 216 217// CHECK-NEXT: %[[ALLOCA:.*]] = alloca() 218// CHECK: return 219 220// ----- 221 222// Test Case: 223// bb0 <- Initial position of the first AllocOp 224// / \ 225// bb1 bb2 226// | / \ 227// | bb3 bb4 228// \ \ / 229// \ / 230// bb5 <- Initial position of the second AllocOp 231// PromoteBuffersToStack expected behavior: The two allocs should be converted 232// into allocas. 233 234#map0 = affine_map<(d0) -> (d0)> 235 236// CHECK-LABEL: func @ifElseNested 237func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 238 %0 = alloc() : memref<2xf32> 239 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 240 cond_br %arg0, 241 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 242 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 243^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 244 br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) 245^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 246 cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) 247^bb3(%5: memref<2xf32>): 248 br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) 249^bb4(%6: memref<2xf32>): 250 br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) 251^bb5(%7: memref<2xf32>, %8: memref<2xf32>): 252 %9 = alloc() : memref<2xf32> 253 test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) 254 test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) 255 return 256} 257 258// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca() 259// CHECK-NEXT: test.buffer_based 260// CHECK: %[[ALLOCA1:.*]] = alloca() 261// CHECK: test.buffer_based 262// CHECK: test.copy(%[[ALLOCA1]] 263// CHECK-NEXT: return 264 265// ----- 266 267// Test Case: Dead operations in a single block. 268// PromoteBuffersToStack expected behavior: It converts the two AllocOps into 269// allocas. 270 271#map0 = affine_map<(d0) -> (d0)> 272 273// CHECK-LABEL: func @redundantOperations 274func @redundantOperations(%arg0: memref<2xf32>) { 275 %0 = alloc() : memref<2xf32> 276 test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) 277 %1 = alloc() : memref<2xf32> 278 test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>) 279 return 280} 281 282// CHECK: (%[[ARG0:.*]]: {{.*}}) 283// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca() 284// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOCA0]] 285// CHECK: %[[ALLOCA1:.*]] = alloca() 286// CHECK-NEXT: test.buffer_based in(%[[ALLOCA0]]{{.*}} out(%[[ALLOCA1]] 287// CHECK: return 288 289// ----- 290 291// Test Case: 292// bb0 293// / \ 294// Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp 295// \ / 296// bb3 297// PromoteBuffersToStack expected behavior: Both AllocOps are converted into 298// allocas. 299 300#map0 = affine_map<(d0) -> (d0)> 301 302// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc 303func @moving_alloc_and_inserting_missing_dealloc( 304 %cond: i1, 305 %arg0: memref<2xf32>, 306 %arg1: memref<2xf32>) { 307 cond_br %cond, ^bb1, ^bb2 308^bb1: 309 %0 = alloc() : memref<2xf32> 310 test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) 311 br ^exit(%0 : memref<2xf32>) 312^bb2: 313 %1 = alloc() : memref<2xf32> 314 test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) 315 br ^exit(%1 : memref<2xf32>) 316^exit(%arg2: memref<2xf32>): 317 test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) 318 return 319} 320 321// CHECK-NEXT: cond_br {{.*}} 322// CHECK: ^bb1 323// CHECK-NEXT: %{{.*}} = alloca() 324// CHECK: ^bb2 325// CHECK-NEXT: %{{.*}} = alloca() 326// CHECK: test.copy 327// CHECK-NEXT: return 328 329// ----- 330 331// Test Case: Nested regions - This test defines a BufferBasedOp inside the 332// region of a RegionBufferBasedOp. 333// PromoteBuffersToStack expected behavior: The AllocOps are converted into 334// allocas. 335 336#map0 = affine_map<(d0) -> (d0)> 337 338// CHECK-LABEL: func @nested_regions_and_cond_branch 339func @nested_regions_and_cond_branch( 340 %arg0: i1, 341 %arg1: memref<2xf32>, 342 %arg2: memref<2xf32>) { 343 cond_br %arg0, ^bb1, ^bb2 344^bb1: 345 br ^bb3(%arg1 : memref<2xf32>) 346^bb2: 347 %0 = alloc() : memref<2xf32> 348 test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { 349 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 350 %1 = alloc() : memref<2xf32> 351 test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) 352 %tmp1 = exp %gen1_arg0 : f32 353 test.region_yield %tmp1 : f32 354 } 355 br ^bb3(%0 : memref<2xf32>) 356^bb3(%1: memref<2xf32>): 357 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 358 return 359} 360 361// CHECK-NEXT: cond_br {{.*}} 362// CHECK: ^bb2 363// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca() 364// CHECK: ^bb0 365// CHECK-NEXT: %[[ALLOCA1:.*]] = alloc() 366 367// ----- 368 369// Test Case: buffer deallocation escaping 370// PromoteBuffersToStack expected behavior: The first alloc is returned, so 371// there is no conversion allowed. The second alloc is converted, since it 372// only remains in the scope of the function. 373 374#map0 = affine_map<(d0) -> (d0)> 375 376// CHECK-LABEL: func @memref_in_function_results 377func @memref_in_function_results( 378 %arg0: memref<5xf32>, 379 %arg1: memref<10xf32>, 380 %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) { 381 %x = alloc() : memref<15xf32> 382 %y = alloc() : memref<5xf32> 383 test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>) 384 test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>) 385 return %arg1, %x : memref<10xf32>, memref<15xf32> 386} 387// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, 388// CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>) 389// CHECK: %[[ALLOC:.*]] = alloc() 390// CHECK: %[[ALLOCA:.*]] = alloca() 391// CHECK: test.copy 392// CHECK: return %[[ARG1]], %[[ALLOC]] 393 394// ----- 395 396// Test Case: nested region control flow 397// The allocation in the nested if branch cannot be converted to an alloca 398// due to its dynamic memory allocation behavior. 399 400// CHECK-LABEL: func @nested_region_control_flow 401func @nested_region_control_flow( 402 %arg0 : index, 403 %arg1 : index) -> memref<?x?xf32> { 404 %0 = cmpi "eq", %arg0, %arg1 : index 405 %1 = alloc(%arg0, %arg0) : memref<?x?xf32> 406 %2 = scf.if %0 -> (memref<?x?xf32>) { 407 scf.yield %1 : memref<?x?xf32> 408 } else { 409 %3 = alloc(%arg0, %arg1) : memref<?x?xf32> 410 scf.yield %1 : memref<?x?xf32> 411 } 412 return %2 : memref<?x?xf32> 413} 414 415// CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0) 416// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if 417// CHECK: scf.yield %[[ALLOC0]] 418// CHECK: %[[ALLOC2:.*]] = alloc(%arg0, %arg1) 419// CHECK-NEXT: scf.yield %[[ALLOC0]] 420// CHECK: return %[[ALLOC1]] 421 422// ----- 423 424// Test Case: nested region control flow within a region interface. 425// The alloc %0 does not need to be converted in this case since the 426// allocation finally escapes the method. 427 428// CHECK-LABEL: func @inner_region_control_flow 429func @inner_region_control_flow(%arg0 : index) -> memref<2x2xf32> { 430 %0 = alloc() : memref<2x2xf32> 431 %1 = test.region_if %0 : memref<2x2xf32> -> (memref<2x2xf32>) then { 432 ^bb0(%arg1 : memref<2x2xf32>): 433 test.region_if_yield %arg1 : memref<2x2xf32> 434 } else { 435 ^bb0(%arg1 : memref<2x2xf32>): 436 test.region_if_yield %arg1 : memref<2x2xf32> 437 } join { 438 ^bb0(%arg1 : memref<2x2xf32>): 439 test.region_if_yield %arg1 : memref<2x2xf32> 440 } 441 return %1 : memref<2x2xf32> 442} 443 444// CHECK: %[[ALLOC0:.*]] = alloc() 445// CHECK-NEXT: %[[ALLOC1:.*]] = test.region_if 446// CHECK-NEXT: ^bb0(%[[ALLOC2:.*]]:{{.*}}): 447// CHECK-NEXT: test.region_if_yield %[[ALLOC2]] 448// CHECK: ^bb0(%[[ALLOC3:.*]]:{{.*}}): 449// CHECK-NEXT: test.region_if_yield %[[ALLOC3]] 450// CHECK: ^bb0(%[[ALLOC4:.*]]:{{.*}}): 451// CHECK-NEXT: test.region_if_yield %[[ALLOC4]] 452// CHECK: return %[[ALLOC1]] 453 454// ----- 455 456// Test Case: structured control-flow loop using a nested alloc. 457// Alloc %0 will be converted to an alloca. %3 is not transformed. 458 459// CHECK-LABEL: func @loop_alloc 460func @loop_alloc( 461 %lb: index, 462 %ub: index, 463 %step: index, 464 %buf: memref<2xf32>, 465 %res: memref<2xf32>) { 466 %0 = alloc() : memref<2xf32> 467 %1 = scf.for %i = %lb to %ub step %step 468 iter_args(%iterBuf = %buf) -> memref<2xf32> { 469 %2 = cmpi "eq", %i, %ub : index 470 %3 = alloc() : memref<2xf32> 471 scf.yield %3 : memref<2xf32> 472 } 473 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 474 return 475} 476 477// CHECK-NEXT: %[[ALLOCA:.*]] = alloca() 478// CHECK-NEXT: scf.for 479// CHECK: %[[ALLOC:.*]] = alloc() 480 481// ----- 482 483// Test Case: structured control-flow loop with a nested if operation. 484// The loop yields buffers that have been defined outside of the loop and the 485// backeges only use the iteration arguments (or one of its aliases). 486// Therefore, we do not have to (and are not allowed to) free any buffers 487// that are passed via the backedges. The alloc is converted to an AllocaOp. 488 489// CHECK-LABEL: func @loop_nested_if_no_alloc 490func @loop_nested_if_no_alloc( 491 %lb: index, 492 %ub: index, 493 %step: index, 494 %buf: memref<2xf32>, 495 %res: memref<2xf32>) { 496 %0 = alloc() : memref<2xf32> 497 %1 = scf.for %i = %lb to %ub step %step 498 iter_args(%iterBuf = %buf) -> memref<2xf32> { 499 %2 = cmpi "eq", %i, %ub : index 500 %3 = scf.if %2 -> (memref<2xf32>) { 501 scf.yield %0 : memref<2xf32> 502 } else { 503 scf.yield %iterBuf : memref<2xf32> 504 } 505 scf.yield %3 : memref<2xf32> 506 } 507 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 508 return 509} 510 511// CHECK: %[[ALLOCA0:.*]] = alloca() 512// CHECK-NEXT: %[[ALLOCA1:.*]] = scf.for {{.*}} iter_args(%[[IALLOCA:.*]] = 513// CHECK: %[[ALLOCA2:.*]] = scf.if 514// CHECK: scf.yield %[[ALLOCA0]] 515// CHECK: scf.yield %[[IALLOCA]] 516// CHECK: scf.yield %[[ALLOCA2]] 517// CHECK: test.copy(%[[ALLOCA1]], %arg4) 518 519// ----- 520 521// Test Case: structured control-flow loop with a nested if operation using 522// a deeply nested buffer allocation. 523// The allocs are not converted in this case. 524 525// CHECK-LABEL: func @loop_nested_if_alloc 526func @loop_nested_if_alloc( 527 %lb: index, 528 %ub: index, 529 %step: index, 530 %buf: memref<2xf32>) -> memref<2xf32> { 531 %0 = alloc() : memref<2xf32> 532 %1 = scf.for %i = %lb to %ub step %step 533 iter_args(%iterBuf = %buf) -> memref<2xf32> { 534 %2 = cmpi "eq", %i, %ub : index 535 %3 = scf.if %2 -> (memref<2xf32>) { 536 %4 = alloc() : memref<2xf32> 537 scf.yield %4 : memref<2xf32> 538 } else { 539 scf.yield %0 : memref<2xf32> 540 } 541 scf.yield %3 : memref<2xf32> 542 } 543 return %1 : memref<2xf32> 544} 545 546// CHECK: %[[ALLOC0:.*]] = alloc() 547// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} 548// CHECK: %[[ALLOC2:.*]] = scf.if 549// CHECK: %[[ALLOC3:.*]] = alloc() 550// CHECK-NEXT: scf.yield %[[ALLOC3]] 551// CHECK: scf.yield %[[ALLOC0]] 552// CHECK: scf.yield %[[ALLOC2]] 553// CHECK: return %[[ALLOC1]] 554 555// ----- 556 557// Test Case: The allocated buffer is too large and, hence, it is not 558// converted. In the actual implementation the largest size is 1KB. 559 560// CHECK-LABEL: func @large_buffer_allocation 561func @large_buffer_allocation(%arg0: memref<2048xf32>) { 562 %0 = alloc() : memref<2048xf32> 563 test.copy(%0, %arg0) : (memref<2048xf32>, memref<2048xf32>) 564 return 565} 566 567// CHECK-NEXT: %[[ALLOC:.*]] = alloc() 568// CHECK-NEXT: test.copy 569