1//===-- Passes.td - Transforms pass definition file --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains definitions for passes within the Transforms/ directory. 10// 11//===----------------------------------------------------------------------===// 12 13#ifndef MLIR_TRANSFORMS_PASSES 14#define MLIR_TRANSFORMS_PASSES 15 16include "mlir/Pass/PassBase.td" 17include "mlir/Rewrite/PassUtil.td" 18 19def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> { 20 let summary = "Fuse affine loop nests"; 21 let description = [{ 22 This pass performs fusion of loop nests using a slicing-based approach. It 23 combines two fusion strategies: producer-consumer fusion and sibling fusion. 24 Producer-consumer fusion is aimed at fusing pairs of loops where the first 25 one writes to a memref that the second reads. Sibling fusion targets pairs 26 of loops that share no dependences between them but that load from the same 27 memref. The fused loop nests, when possible, are rewritten to access 28 significantly smaller local buffers instead of the original memref's, and 29 the latter are often either completely optimized away or contracted. This 30 transformation leads to enhanced locality and lower memory footprint through 31 the elimination or contraction of temporaries/intermediate memref's. These 32 benefits are sometimes achieved at the expense of redundant computation 33 through a cost model that evaluates available choices such as the depth at 34 which a source slice should be materialized in the designation slice. 35 36 Example 1: Producer-consumer fusion. 37 Input: 38 ```mlir 39 func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) { 40 %0 = alloc() : memref<10xf32> 41 %1 = alloc() : memref<10xf32> 42 %cst = constant 0.000000e+00 : f32 43 affine.for %arg2 = 0 to 10 { 44 affine.store %cst, %0[%arg2] : memref<10xf32> 45 affine.store %cst, %1[%arg2] : memref<10xf32> 46 } 47 affine.for %arg2 = 0 to 10 { 48 %2 = affine.load %0[%arg2] : memref<10xf32> 49 %3 = addf %2, %2 : f32 50 affine.store %3, %arg0[%arg2] : memref<10xf32> 51 } 52 affine.for %arg2 = 0 to 10 { 53 %2 = affine.load %1[%arg2] : memref<10xf32> 54 %3 = mulf %2, %2 : f32 55 affine.store %3, %arg1[%arg2] : memref<10xf32> 56 } 57 return 58 } 59 ``` 60 Output: 61 ```mlir 62 func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) { 63 %0 = alloc() : memref<1xf32> 64 %1 = alloc() : memref<1xf32> 65 %cst = constant 0.000000e+00 : f32 66 affine.for %arg2 = 0 to 10 { 67 affine.store %cst, %0[0] : memref<1xf32> 68 affine.store %cst, %1[0] : memref<1xf32> 69 %2 = affine.load %1[0] : memref<1xf32> 70 %3 = mulf %2, %2 : f32 71 affine.store %3, %arg1[%arg2] : memref<10xf32> 72 %4 = affine.load %0[0] : memref<1xf32> 73 %5 = addf %4, %4 : f32 74 affine.store %5, %arg0[%arg2] : memref<10xf32> 75 } 76 return 77 } 78 ``` 79 80 Example 2: Sibling fusion. 81 Input: 82 ```mlir 83 func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>, 84 %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>, 85 %arg4: memref<10x10xf32>) { 86 affine.for %arg5 = 0 to 3 { 87 affine.for %arg6 = 0 to 3 { 88 %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> 89 %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32> 90 %2 = mulf %0, %1 : f32 91 affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32> 92 } 93 } 94 affine.for %arg5 = 0 to 3 { 95 affine.for %arg6 = 0 to 3 { 96 %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> 97 %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32> 98 %2 = addf %0, %1 : f32 99 affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32> 100 } 101 } 102 return 103 } 104 ``` 105 Output: 106 ```mlir 107 func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>, 108 %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>, 109 %arg4: memref<10x10xf32>) { 110 affine.for %arg5 = 0 to 3 { 111 affine.for %arg6 = 0 to 3 { 112 %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> 113 %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32> 114 %2 = mulf %0, %1 : f32 115 affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32> 116 %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> 117 %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32> 118 %5 = addf %3, %4 : f32 119 affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32> 120 } 121 } 122 return 123 } 124 ``` 125 }]; 126 let constructor = "mlir::createLoopFusionPass()"; 127 let options = [ 128 Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double", 129 /*default=*/"0.30f", "Fractional increase in additional computation " 130 "tolerated while fusing">, 131 Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned", 132 /*default=*/"0", 133 "Faster memory space number to promote fusion buffers to">, 134 Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t", 135 /*default=*/"0", "Threshold size (KiB) for promoting local buffers " 136 "to fast memory space">, 137 Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false", 138 "Enables maximal loop fusion">, 139 ]; 140 let dependentDialects = ["memref::MemRefDialect"]; 141} 142 143def AffinePipelineDataTransfer 144 : FunctionPass<"affine-pipeline-data-transfer"> { 145 let summary = "Pipeline non-blocking data transfers between explicitly " 146 "managed levels of the memory hierarchy"; 147 let description = [{ 148 This pass performs a transformation to overlap non-blocking DMA operations 149 in a loop with computations through double buffering. This is achieved by 150 advancing dma_start operations with respect to other operations. 151 152 Input 153 154 ```mlir 155 func @pipelinedatatransfer() { 156 %0 = alloc() : memref<256xf32> 157 %1 = alloc() : memref<32xf32, 1> 158 %2 = alloc() : memref<1xf32> 159 %c0 = constant 0 : index 160 %c128 = constant 128 : index 161 affine.for %i0 = 0 to 8 { 162 affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32> 163 affine.dma_wait %2[%c0], %c128 : memref<1xf32> 164 %3 = affine.load %1[%i0] : memref<32xf32, 1> 165 %4 = "compute"(%3) : (f32) -> f32 166 affine.store %4, %1[%i0] : memref<32xf32, 1> 167 } 168 return 169 } 170 ``` 171 172 Output 173 174 ```mlir 175 module { 176 func @pipelinedatatransfer() { 177 %c8 = constant 8 : index 178 %c0 = constant 0 : index 179 %0 = alloc() : memref<256xf32> 180 %c0_0 = constant 0 : index 181 %c128 = constant 128 : index 182 %1 = alloc() : memref<2x32xf32, 1> 183 %2 = alloc() : memref<2x1xf32> 184 affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> 185 affine.for %arg0 = 1 to 8 { 186 affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> 187 %8 = affine.apply #map3(%arg0) 188 %9 = affine.apply #map4(%8) 189 %10 = affine.apply #map4(%8) 190 affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> 191 %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1> 192 %12 = "compute"(%11) : (f32) -> f32 193 affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1> 194 } 195 %3 = affine.apply #map3(%c8) 196 %4 = affine.apply #map4(%3) 197 %5 = affine.apply #map4(%3) 198 affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> 199 %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1> 200 %7 = "compute"(%6) : (f32) -> f32 201 affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1> 202 dealloc %2 : memref<2x1xf32> 203 dealloc %1 : memref<2x32xf32, 1> 204 return 205 } 206 } 207 ``` 208 }]; 209 let constructor = "mlir::createPipelineDataTransferPass()"; 210} 211 212def BufferDeallocation : FunctionPass<"buffer-deallocation"> { 213 let summary = "Adds all required dealloc operations for all allocations in the " 214 "input program"; 215 let description = [{ 216 This pass implements an algorithm to automatically introduce all required 217 deallocation operations for all buffers in the input program. This ensures that 218 the resulting program does not have any memory leaks. 219 220 221 Input 222 223 ```mlir 224 #map0 = affine_map<(d0) -> (d0)> 225 module { 226 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 227 cond_br %arg0, ^bb1, ^bb2 228 ^bb1: 229 br ^bb3(%arg1 : memref<2xf32>) 230 ^bb2: 231 %0 = alloc() : memref<2xf32> 232 linalg.generic { 233 args_in = 1 : i64, 234 args_out = 1 : i64, 235 indexing_maps = [#map0, #map0], 236 iterator_types = ["parallel"]} %arg1, %0 { 237 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 238 %tmp1 = exp %gen1_arg0 : f32 239 linalg.yield %tmp1 : f32 240 }: memref<2xf32>, memref<2xf32> 241 br ^bb3(%0 : memref<2xf32>) 242 ^bb3(%1: memref<2xf32>): 243 "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () 244 return 245 } 246 } 247 248 ``` 249 250 Output 251 252 ```mlir 253 #map0 = affine_map<(d0) -> (d0)> 254 module { 255 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 256 cond_br %arg0, ^bb1, ^bb2 257 ^bb1: // pred: ^bb0 258 %0 = alloc() : memref<2xf32> 259 linalg.copy(%arg1, %0) : memref<2xf32>, memref<2xf32> 260 br ^bb3(%0 : memref<2xf32>) 261 ^bb2: // pred: ^bb0 262 %1 = alloc() : memref<2xf32> 263 linalg.generic { 264 args_in = 1 : i64, 265 args_out = 1 : i64, 266 indexing_maps = [#map0, #map0], 267 iterator_types = ["parallel"]} %arg1, %1 { 268 ^bb0(%arg3: f32, %arg4: f32): // no predecessors 269 %4 = exp %arg3 : f32 270 linalg.yield %4 : f32 271 }: memref<2xf32>, memref<2xf32> 272 %2 = alloc() : memref<2xf32> 273 linalg.copy(%1, %2) : memref<2xf32>, memref<2xf32> 274 dealloc %1 : memref<2xf32> 275 br ^bb3(%2 : memref<2xf32>) 276 ^bb3(%3: memref<2xf32>): // 2 preds: ^bb1, ^bb2 277 linalg.copy(%3, %arg2) : memref<2xf32>, memref<2xf32> 278 dealloc %3 : memref<2xf32> 279 return 280 } 281 282 } 283 ``` 284 285 }]; 286 let constructor = "mlir::createBufferDeallocationPass()"; 287} 288 289def BufferHoisting : FunctionPass<"buffer-hoisting"> { 290 let summary = "Optimizes placement of allocation operations by moving them " 291 "into common dominators and out of nested regions"; 292 let description = [{ 293 This pass implements an approach to aggressively move allocations upwards 294 into common dominators and out of nested regions. 295 }]; 296 let constructor = "mlir::createBufferHoistingPass()"; 297} 298 299def BufferLoopHoisting : FunctionPass<"buffer-loop-hoisting"> { 300 let summary = "Optimizes placement of allocation operations by moving them " 301 "out of loop nests"; 302 let description = [{ 303 This pass implements an approach to aggressively move allocations upwards 304 out of loop nests. It does not move allocations into common dominators. 305 }]; 306 let constructor = "mlir::createBufferLoopHoistingPass()"; 307} 308 309def PromoteBuffersToStack : FunctionPass<"promote-buffers-to-stack"> { 310 let summary = "Promotes heap-based allocations to automatically managed " 311 "stack-based allocations"; 312 let description = [{ 313 This pass implements a simple algorithm to convert heap-based memory 314 allocations to stack-based ones. It uses a built-in heuristic to decide 315 whether it makes sense to convert an allocation. Furthermore, dynamic 316 shaped buffers that are limited by the rank of the tensor can be 317 converted. They are only transformed if they are considered to be small. 318 }]; 319 let constructor = "mlir::createPromoteBuffersToStackPass()"; 320 let options = [ 321 Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned", 322 /*default=*/"1024", 323 "Maximal size in bytes to promote allocations to stack.">, 324 Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned", 325 /*default=*/"64", 326 "Bitwidth of the index type. Used for size estimation.">, 327 Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned", 328 /*default=*/"1", 329 "Maximal memref rank to promote dynamic buffers.">, 330 ]; 331} 332 333def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp"> { 334 let summary = "Converts memref-typed function results to out-params"; 335 let description = [{ 336 Some calling conventions prefer to pass output memrefs as "out params". The 337 conversion to this calling convention must be done as an atomic 338 transformation of the entire program (hence this is a module pass). 339 340 For example, if a call is rewritten, the callee needs to be rewritten 341 otherwise the IR will end up invalid. Thus, this transformation 342 require an atomic change to the entire program (e.g. the whole module). 343 344 This pass is expected to run immediately after bufferization is finished. 345 At that point, tensor-typed results will have been converted to memref-typed 346 results, and can be consistently converted to out params. 347 348 All memref-typed results are appended to the function argument list. 349 350 The main issue with this pass (and the out-param calling convention) is that 351 buffers for results need to be allocated in the caller. This currently only 352 works for static shaped memrefs. 353 }]; 354 let constructor = "mlir::createBufferResultsToOutParamsPass()"; 355 let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"]; 356} 357 358def Canonicalizer : Pass<"canonicalize"> { 359 let summary = "Canonicalize operations"; 360 let description = [{ 361 This pass performs various types of canonicalizations over a set of 362 operations. See [Operation Canonicalization](Canonicalization.md) for more 363 details. 364 }]; 365 let constructor = "mlir::createCanonicalizerPass()"; 366 let options = [ 367 Option<"topDownProcessingEnabled", "top-down", "bool", 368 /*default=*/"true", 369 "Seed the worklist in general top-down order">, 370 Option<"enableRegionSimplification", "region-simplify", "bool", 371 /*default=*/"true", 372 "Seed the worklist in general top-down order">, 373 Option<"maxIterations", "max-iterations", "unsigned", 374 /*default=*/"10", 375 "Seed the worklist in general top-down order"> 376 ] # RewritePassUtils.options; 377} 378 379def CSE : Pass<"cse"> { 380 let summary = "Eliminate common sub-expressions"; 381 let description = [{ 382 This pass implements a generalized algorithm for common sub-expression 383 elimination. This pass relies on information provided by the 384 `Memory SideEffect` interface to identify when it is safe to eliminate 385 operations. See [Common subexpression elimination](https://en.wikipedia.org/wiki/Common_subexpression_elimination) 386 for more general details on this optimization. 387 }]; 388 let constructor = "mlir::createCSEPass()"; 389 let statistics = [ 390 Statistic<"numCSE", "num-cse'd", "Number of operations CSE'd">, 391 Statistic<"numDCE", "num-dce'd", "Number of operations DCE'd"> 392 ]; 393} 394 395def Inliner : Pass<"inline"> { 396 let summary = "Inline function calls"; 397 let constructor = "mlir::createInlinerPass()"; 398 let options = [ 399 Option<"defaultPipelineStr", "default-pipeline", "std::string", 400 /*default=*/"", "The default optimizer pipeline used for callables">, 401 ListOption<"opPipelineStrs", "op-pipelines", "std::string", 402 "Callable operation specific optimizer pipelines (in the form " 403 "of `dialect.op(pipeline)`)", 404 "llvm::cl::MiscFlags::CommaSeparated">, 405 Option<"maxInliningIterations", "max-iterations", "unsigned", 406 /*default=*/"4", 407 "Maximum number of iterations when inlining within an SCC">, 408 ]; 409} 410 411def FinalizingBufferize : FunctionPass<"finalizing-bufferize"> { 412 let summary = "Finalize a partial bufferization"; 413 let description = [{ 414 A bufferize pass that finalizes a partial bufferization by removing 415 remaining `memref.tensor_load` and `memref.buffer_cast` operations. 416 417 The removal of those operations is only possible if the operations only 418 exist in pairs, i.e., all uses of `memref.tensor_load` operations are 419 `memref.buffer_cast` operations. 420 421 This pass will fail if not all operations can be removed or if any operation 422 with tensor typed operands remains. 423 }]; 424 let constructor = "mlir::createFinalizingBufferizePass()"; 425} 426 427def LocationSnapshot : Pass<"snapshot-op-locations"> { 428 let summary = "Generate new locations from the current IR"; 429 let description = [{ 430 This pass allows for generating new locations from the IR during any stage 431 of compilation, by snapshotting the IR to a file and using that file to 432 generate new locations for the operations. 433 434 Depending on the value of the `tag` option, different resulting locations 435 may be generated: 436 437 * If unset, the original location of the operation is replaced. 438 439 Example: 440 441 ```mlir 442 // old: 443 ... loc("original_source.cpp":1:1) 444 445 // new: 446 ... loc("snapshot_source.mlir":10:10) 447 ``` 448 449 * If set, the new location is fused with the original location in the form 450 of a [`Name Location`](Diagnostics.md#name-location) with the specified tag. 451 452 Example: 453 454 ```mlir 455 // old: 456 ... loc("original_source.cpp":1:1) 457 458 // new: 459 ... loc(fused["original_source.cpp":1:1, "snapshot"("snapshot_source.mlir":10:10)]) 460 ``` 461 }]; 462 let constructor = "mlir::createLocationSnapshotPass()"; 463 let options = [ 464 Option<"fileName", "filename", "std::string", /*default=*/"", 465 "The filename to print the generated IR">, 466 Option<"tag", "tag", "std::string", /*default=*/"", 467 "A tag to use when fusing the new locations with the " 468 "original. If unset, the locations are replaced.">, 469 ]; 470} 471 472def LoopCoalescing : FunctionPass<"loop-coalescing"> { 473 let summary = "Coalesce nested loops with independent bounds into a single " 474 "loop"; 475 let constructor = "mlir::createLoopCoalescingPass()"; 476} 477 478def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> { 479 let summary = "Hoist loop invariant instructions outside of the loop"; 480 let constructor = "mlir::createLoopInvariantCodeMotionPass()"; 481} 482 483def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> { 484 let summary = "Normalize memrefs"; 485 let description = [{ 486 This pass transforms memref types with a non-trivial 487 [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into 488 memref types with an identity layout map, e.g. (i, j) -> (i, j). This 489 pass is inter-procedural, in the sense that it can modify function 490 interfaces and call sites that pass memref types. In order to modify 491 memref types while preserving the original behavior, users of those 492 memref types are also modified to incorporate the resulting layout map. 493 For instance, an [AffineLoadOp] 494 (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop) 495 will be updated to compose the layout map with with the affine expression 496 contained in the op. Operations marked with the [MemRefsNormalizable] 497 (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are 498 expected to be normalizable. Supported operations include affine 499 operations, memref.alloc, memref.dealloc, and std.return. 500 501 Given an appropriate layout map specified in the code, this transformation 502 can express tiled or linearized access to multi-dimensional data 503 structures, but will not modify memref types without an explicit layout 504 map. 505 506 Currently this pass is limited to only modify 507 functions where all memref types can be normalized. If a function 508 contains any operations that are not MemRefNormalizable, then the function 509 and any functions that call or call it will not be modified. 510 511 Input 512 513 ```mlir 514 #tile = affine_map<(i) -> (i floordiv 4, i mod 4)> 515 func @matmul(%A: memref<16xf64, #tile>, 516 %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) { 517 affine.for %arg3 = 0 to 16 { 518 %a = affine.load %A[%arg3] : memref<16xf64, #tile> 519 %p = mulf %a, %a : f64 520 affine.store %p, %A[%arg3] : memref<16xf64, #tile> 521 } 522 %c = alloc() : memref<16xf64, #tile> 523 %d = affine.load %c[0] : memref<16xf64, #tile> 524 return %A: memref<16xf64, #tile> 525 } 526 ``` 527 528 Output 529 530 ```mlir 531 func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>) 532 -> memref<4x4xf64> { 533 affine.for %arg3 = 0 to 16 { 534 %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64> 535 %4 = mulf %3, %3 : f64 536 affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64> 537 } 538 %0 = alloc() : memref<4x4xf64> 539 %1 = affine.apply #map1() 540 %2 = affine.load %0[0, 0] : memref<4x4xf64> 541 return %arg0 : memref<4x4xf64> 542 } 543 ``` 544 545 Input 546 547 ``` 548 #linear8 = affine_map<(i, j) -> (i * 8 + j)> 549 func @linearize(%arg0: memref<8x8xi32, #linear8>, 550 %arg1: memref<8x8xi32, #linear8>, 551 %arg2: memref<8x8xi32, #linear8>) { 552 %c8 = constant 8 : index 553 %c0 = constant 0 : index 554 %c1 = constant 1 : index 555 affine.for %arg3 = %c0 to %c8 { 556 affine.for %arg4 = %c0 to %c8 { 557 affine.for %arg5 = %c0 to %c8 { 558 %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8> 559 %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8> 560 %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8> 561 %3 = muli %0, %1 : i32 562 %4 = addi %2, %3 : i32 563 affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8> 564 } 565 } 566 } 567 return 568 } 569 ``` 570 571 Output 572 573 ```mlir 574 func @linearize(%arg0: memref<64xi32>, 575 %arg1: memref<64xi32>, 576 %arg2: memref<64xi32>) { 577 %c8 = constant 8 : index 578 %c0 = constant 0 : index 579 affine.for %arg3 = %c0 to %c8 { 580 affine.for %arg4 = %c0 to %c8 { 581 affine.for %arg5 = %c0 to %c8 { 582 %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32> 583 %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32> 584 %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32> 585 %3 = muli %0, %1 : i32 586 %4 = addi %2, %3 : i32 587 affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32> 588 } 589 } 590 } 591 return 592 } 593 ``` 594 }]; 595 let constructor = "mlir::createNormalizeMemRefsPass()"; 596 let dependentDialects = ["AffineDialect"]; 597} 598 599def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> { 600 let summary = "Collapse parallel loops to use less induction variables"; 601 let constructor = "mlir::createParallelLoopCollapsingPass()"; 602 let options = [ 603 ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned", 604 "Which loop indices to combine 0th loop index", 605 "llvm::cl::MiscFlags::CommaSeparated">, 606 ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned", 607 "Which loop indices to combine into the position 1 loop index", 608 "llvm::cl::MiscFlags::CommaSeparated">, 609 ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned", 610 "Which loop indices to combine into the position 2 loop index", 611 "llvm::cl::MiscFlags::CommaSeparated">, 612 ]; 613} 614 615def PrintCFG : FunctionPass<"print-cfg-graph"> { 616 let summary = "Print CFG graph per-Region"; 617 let constructor = "mlir::createPrintCFGGraphPass()"; 618} 619 620def PrintOpStats : Pass<"print-op-stats"> { 621 let summary = "Print statistics of operations"; 622 let constructor = "mlir::createPrintOpStatsPass()"; 623} 624 625def SCCP : Pass<"sccp"> { 626 let summary = "Sparse Conditional Constant Propagation"; 627 let description = [{ 628 This pass implements a general algorithm for sparse conditional constant 629 propagation. This algorithm detects values that are known to be constant and 630 optimistically propagates this throughout the IR. Any values proven to be 631 constant are replaced, and removed if possible. 632 633 This implementation is based on the algorithm described by Wegman and Zadeck 634 in [“Constant Propagation with Conditional Branches”](https://dl.acm.org/doi/10.1145/103135.103136) (1991). 635 }]; 636 let constructor = "mlir::createSCCPPass()"; 637} 638 639def StripDebugInfo : Pass<"strip-debuginfo"> { 640 let summary = "Strip debug info from all operations"; 641 let description = [{ 642 This pass strips the IR of any location information, by replacing all 643 operation locations with [`unknown`](Diagnostics.md#unknown-location). 644 }]; 645 let constructor = "mlir::createStripDebugInfoPass()"; 646} 647 648def SymbolDCE : Pass<"symbol-dce"> { 649 let summary = "Eliminate dead symbols"; 650 let description = [{ 651 This pass deletes all symbols that are found to be unreachable. This is done 652 by computing the set of operations that are known to be live, propagating 653 that liveness to other symbols, and then deleting all symbols that are not 654 within this live set. Live symbols are those that have a 655 [visibility](SymbolsAndSymbolTables.md#symbol-visibility) that extends 656 beyond the IR, e.g. `public`, or those that are referenced by live symbols 657 or other non-Symbol operations. 658 659 For example, consider the following input: 660 661 ```mlir 662 func private @dead_private_function() 663 func private @live_private_function() 664 665 // Note: The `public` isn't necessary here, as this is the default. 666 func public @public_function() { 667 "foo.return"() {uses = [@live_private_function]} : () -> () 668 } 669 ``` 670 671 A known live function, `public_function`, contains a reference to an 672 otherwise non-live function `live_private_function`. After running 673 `symbol-dce`, only these two symbols should remain, as the final symbol 674 `dead_private_function` is not visible outside of the current IR and there 675 are no links to known-live operations. After running, we get the expected: 676 677 ```mlir 678 func private @live_private_function() 679 680 func public @public_function() { 681 "foo.return"() {uses = [@live_private_function]} : () -> () 682 } 683 ``` 684 685 See [Symbols and SymbolTables](SymbolsAndSymbolTables.md) for more 686 information on `Symbols`. 687 }]; 688 let constructor = "mlir::createSymbolDCEPass()"; 689} 690 691def ViewOpGraphPass : Pass<"view-op-graph", "ModuleOp"> { 692 let summary = "Print graphviz view of module"; 693 let description = [{ 694 This pass prints a graphviz per block of a module. 695 696 - Op are represented as nodes; 697 - Uses as edges; 698 }]; 699 let constructor = "mlir::createPrintOpGraphPass()"; 700 let options = [ 701 Option<"title", "title", "std::string", 702 /*default=*/"", "The prefix of the title of the graph">, 703 Option<"shortNames", "short-names", "bool", /*default=*/"false", 704 "Use short names"> 705 ]; 706} 707 708#endif // MLIR_TRANSFORMS_PASSES 709