1//===-- Passes.td - Transforms pass definition file --------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains definitions for passes within the Transforms/ directory.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef MLIR_TRANSFORMS_PASSES
14#define MLIR_TRANSFORMS_PASSES
15
16include "mlir/Pass/PassBase.td"
17include "mlir/Rewrite/PassUtil.td"
18
19def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
20  let summary = "Fuse affine loop nests";
21  let description = [{
22    This pass performs fusion of loop nests using a slicing-based approach. It
23    combines two fusion strategies: producer-consumer fusion and sibling fusion.
24    Producer-consumer fusion is aimed at fusing pairs of loops where the first
25    one writes to a memref that the second reads. Sibling fusion targets pairs
26    of loops that share no dependences between them but that load from the same
27    memref. The fused loop nests, when possible, are rewritten to access
28    significantly smaller local buffers instead of the original memref's, and
29    the latter are often either completely optimized away or contracted. This
30    transformation leads to enhanced locality and lower memory footprint through
31    the elimination or contraction of temporaries/intermediate memref's. These
32    benefits are sometimes achieved at the expense of redundant computation
33    through a cost model that evaluates available choices such as the depth at
34    which a source slice should be materialized in the designation slice.
35
36    Example 1: Producer-consumer fusion.
37    Input:
38    ```mlir
39    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
40      %0 = alloc() : memref<10xf32>
41      %1 = alloc() : memref<10xf32>
42      %cst = constant 0.000000e+00 : f32
43      affine.for %arg2 = 0 to 10 {
44        affine.store %cst, %0[%arg2] : memref<10xf32>
45        affine.store %cst, %1[%arg2] : memref<10xf32>
46      }
47      affine.for %arg2 = 0 to 10 {
48        %2 = affine.load %0[%arg2] : memref<10xf32>
49        %3 = addf %2, %2 : f32
50        affine.store %3, %arg0[%arg2] : memref<10xf32>
51      }
52      affine.for %arg2 = 0 to 10 {
53        %2 = affine.load %1[%arg2] : memref<10xf32>
54        %3 = mulf %2, %2 : f32
55        affine.store %3, %arg1[%arg2] : memref<10xf32>
56      }
57      return
58    }
59    ```
60    Output:
61    ```mlir
62    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
63      %0 = alloc() : memref<1xf32>
64      %1 = alloc() : memref<1xf32>
65      %cst = constant 0.000000e+00 : f32
66      affine.for %arg2 = 0 to 10 {
67        affine.store %cst, %0[0] : memref<1xf32>
68        affine.store %cst, %1[0] : memref<1xf32>
69        %2 = affine.load %1[0] : memref<1xf32>
70        %3 = mulf %2, %2 : f32
71        affine.store %3, %arg1[%arg2] : memref<10xf32>
72        %4 = affine.load %0[0] : memref<1xf32>
73        %5 = addf %4, %4 : f32
74        affine.store %5, %arg0[%arg2] : memref<10xf32>
75      }
76      return
77    }
78    ```
79
80    Example 2: Sibling fusion.
81    Input:
82    ```mlir
83    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
84                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
85                         %arg4: memref<10x10xf32>) {
86      affine.for %arg5 = 0 to 3 {
87        affine.for %arg6 = 0 to 3 {
88          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
89          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
90          %2 = mulf %0, %1 : f32
91          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
92        }
93      }
94      affine.for %arg5 = 0 to 3 {
95        affine.for %arg6 = 0 to 3 {
96          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
97          %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
98          %2 = addf %0, %1 : f32
99          affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32>
100        }
101      }
102      return
103    }
104    ```
105    Output:
106    ```mlir
107    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
108                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
109                         %arg4: memref<10x10xf32>) {
110      affine.for %arg5 = 0 to 3 {
111        affine.for %arg6 = 0 to 3 {
112          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
113          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
114          %2 = mulf %0, %1 : f32
115          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
116          %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
117          %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
118          %5 = addf %3, %4 : f32
119          affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32>
120        }
121      }
122      return
123    }
124    ```
125  }];
126  let constructor = "mlir::createLoopFusionPass()";
127  let options = [
128    Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
129           /*default=*/"0.30f", "Fractional increase in additional computation "
130                                "tolerated while fusing">,
131    Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
132           /*default=*/"0",
133           "Faster memory space number to promote fusion buffers to">,
134    Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
135           /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
136                            "to fast memory space">,
137    Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
138           "Enables maximal loop fusion">,
139  ];
140  let dependentDialects = ["memref::MemRefDialect"];
141}
142
143def AffinePipelineDataTransfer
144    : FunctionPass<"affine-pipeline-data-transfer"> {
145  let summary = "Pipeline non-blocking data transfers between explicitly "
146                "managed levels of the memory hierarchy";
147  let description = [{
148    This pass performs a transformation to overlap non-blocking DMA operations
149    in a loop with computations through double buffering. This is achieved by
150    advancing dma_start operations with respect to other operations.
151
152    Input
153
154    ```mlir
155    func @pipelinedatatransfer() {
156      %0 = alloc() : memref<256xf32>
157      %1 = alloc() : memref<32xf32, 1>
158      %2 = alloc() : memref<1xf32>
159      %c0 = constant 0 : index
160      %c128 = constant 128 : index
161      affine.for %i0 = 0 to 8 {
162        affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
163        affine.dma_wait %2[%c0], %c128 : memref<1xf32>
164        %3 = affine.load %1[%i0] : memref<32xf32, 1>
165        %4 = "compute"(%3) : (f32) -> f32
166        affine.store %4, %1[%i0] : memref<32xf32, 1>
167      }
168      return
169    }
170    ```
171
172    Output
173
174    ```mlir
175    module {
176      func @pipelinedatatransfer() {
177        %c8 = constant 8 : index
178        %c0 = constant 0 : index
179        %0 = alloc() : memref<256xf32>
180        %c0_0 = constant 0 : index
181        %c128 = constant 128 : index
182        %1 = alloc() : memref<2x32xf32, 1>
183        %2 = alloc() : memref<2x1xf32>
184        affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
185        affine.for %arg0 = 1 to 8 {
186          affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
187          %8 = affine.apply #map3(%arg0)
188          %9 = affine.apply #map4(%8)
189          %10 = affine.apply #map4(%8)
190          affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
191          %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
192          %12 = "compute"(%11) : (f32) -> f32
193          affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
194        }
195        %3 = affine.apply #map3(%c8)
196        %4 = affine.apply #map4(%3)
197        %5 = affine.apply #map4(%3)
198        affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
199        %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
200        %7 = "compute"(%6) : (f32) -> f32
201        affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
202        dealloc %2 : memref<2x1xf32>
203        dealloc %1 : memref<2x32xf32, 1>
204        return
205      }
206    }
207    ```
208  }];
209  let constructor = "mlir::createPipelineDataTransferPass()";
210}
211
212def BufferDeallocation : FunctionPass<"buffer-deallocation"> {
213  let summary = "Adds all required dealloc operations for all allocations in the "
214                "input program";
215  let description = [{
216    This pass implements an algorithm to automatically introduce all required
217    deallocation operations for all buffers in the input program. This ensures that
218    the resulting program does not have any memory leaks.
219
220
221    Input
222
223    ```mlir
224    #map0 = affine_map<(d0) -> (d0)>
225    module {
226      func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
227        cond_br %arg0, ^bb1, ^bb2
228      ^bb1:
229        br ^bb3(%arg1 : memref<2xf32>)
230      ^bb2:
231        %0 = alloc() : memref<2xf32>
232        linalg.generic {
233          args_in = 1 : i64,
234          args_out = 1 : i64,
235          indexing_maps = [#map0, #map0],
236          iterator_types = ["parallel"]} %arg1, %0 {
237        ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
238          %tmp1 = exp %gen1_arg0 : f32
239          linalg.yield %tmp1 : f32
240        }: memref<2xf32>, memref<2xf32>
241        br ^bb3(%0 : memref<2xf32>)
242      ^bb3(%1: memref<2xf32>):
243        "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
244        return
245      }
246    }
247
248    ```
249
250    Output
251
252    ```mlir
253    #map0 = affine_map<(d0) -> (d0)>
254    module {
255      func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
256        cond_br %arg0, ^bb1, ^bb2
257      ^bb1:  // pred: ^bb0
258        %0 = alloc() : memref<2xf32>
259        linalg.copy(%arg1, %0) : memref<2xf32>, memref<2xf32>
260        br ^bb3(%0 : memref<2xf32>)
261      ^bb2:  // pred: ^bb0
262        %1 = alloc() : memref<2xf32>
263        linalg.generic {
264          args_in = 1 : i64,
265          args_out = 1 : i64,
266          indexing_maps = [#map0, #map0],
267          iterator_types = ["parallel"]} %arg1, %1 {
268        ^bb0(%arg3: f32, %arg4: f32):  // no predecessors
269          %4 = exp %arg3 : f32
270          linalg.yield %4 : f32
271        }: memref<2xf32>, memref<2xf32>
272        %2 = alloc() : memref<2xf32>
273        linalg.copy(%1, %2) : memref<2xf32>, memref<2xf32>
274        dealloc %1 : memref<2xf32>
275        br ^bb3(%2 : memref<2xf32>)
276      ^bb3(%3: memref<2xf32>):  // 2 preds: ^bb1, ^bb2
277        linalg.copy(%3, %arg2) : memref<2xf32>, memref<2xf32>
278        dealloc %3 : memref<2xf32>
279        return
280      }
281
282    }
283    ```
284
285  }];
286  let constructor = "mlir::createBufferDeallocationPass()";
287}
288
289def BufferHoisting : FunctionPass<"buffer-hoisting"> {
290  let summary = "Optimizes placement of allocation operations by moving them "
291                "into common dominators and out of nested regions";
292  let description = [{
293    This pass implements an approach to aggressively move allocations upwards
294    into common dominators and out of nested regions.
295  }];
296  let constructor = "mlir::createBufferHoistingPass()";
297}
298
299def BufferLoopHoisting : FunctionPass<"buffer-loop-hoisting"> {
300  let summary = "Optimizes placement of allocation operations by moving them "
301                "out of loop nests";
302  let description = [{
303    This pass implements an approach to aggressively move allocations upwards
304    out of loop nests. It does not move allocations into common dominators.
305  }];
306  let constructor = "mlir::createBufferLoopHoistingPass()";
307}
308
309def PromoteBuffersToStack : FunctionPass<"promote-buffers-to-stack"> {
310  let summary = "Promotes heap-based allocations to automatically managed "
311                "stack-based allocations";
312  let description = [{
313    This pass implements a simple algorithm to convert heap-based memory
314    allocations to stack-based ones. It uses a built-in heuristic to decide
315    whether it makes sense to convert an allocation. Furthermore, dynamic
316    shaped buffers that are limited by the rank of the tensor can be
317    converted. They are only transformed if they are considered to be small.
318  }];
319  let constructor = "mlir::createPromoteBuffersToStackPass()";
320  let options = [
321    Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
322           /*default=*/"1024",
323           "Maximal size in bytes to promote allocations to stack.">,
324    Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
325           /*default=*/"64",
326           "Bitwidth of the index type. Used for size estimation.">,
327    Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
328           /*default=*/"1",
329           "Maximal memref rank to promote dynamic buffers.">,
330  ];
331}
332
333def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">  {
334  let summary = "Converts memref-typed function results to out-params";
335  let description = [{
336    Some calling conventions prefer to pass output memrefs as "out params". The
337    conversion to this calling convention must be done as an atomic
338    transformation of the entire program (hence this is a module pass).
339
340    For example, if a call is rewritten, the callee needs to be rewritten
341    otherwise the IR will end up invalid. Thus, this transformation
342    require an atomic change to the entire program (e.g. the whole module).
343
344    This pass is expected to run immediately after bufferization is finished.
345    At that point, tensor-typed results will have been converted to memref-typed
346    results, and can be consistently converted to out params.
347
348    All memref-typed results are appended to the function argument list.
349
350    The main issue with this pass (and the out-param calling convention) is that
351    buffers for results need to be allocated in the caller. This currently only
352    works for static shaped memrefs.
353  }];
354  let constructor = "mlir::createBufferResultsToOutParamsPass()";
355  let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"];
356}
357
358def Canonicalizer : Pass<"canonicalize"> {
359  let summary = "Canonicalize operations";
360  let description = [{
361    This pass performs various types of canonicalizations over a set of
362    operations. See [Operation Canonicalization](Canonicalization.md) for more
363    details.
364  }];
365  let constructor = "mlir::createCanonicalizerPass()";
366  let options = [
367    Option<"topDownProcessingEnabled", "top-down", "bool",
368           /*default=*/"true",
369           "Seed the worklist in general top-down order">,
370    Option<"enableRegionSimplification", "region-simplify", "bool",
371           /*default=*/"true",
372           "Seed the worklist in general top-down order">,
373    Option<"maxIterations", "max-iterations", "unsigned",
374           /*default=*/"10",
375           "Seed the worklist in general top-down order">
376  ] # RewritePassUtils.options;
377}
378
379def CSE : Pass<"cse"> {
380  let summary = "Eliminate common sub-expressions";
381  let description = [{
382    This pass implements a generalized algorithm for common sub-expression
383    elimination. This pass relies on information provided by the
384    `Memory SideEffect` interface to identify when it is safe to eliminate
385    operations. See [Common subexpression elimination](https://en.wikipedia.org/wiki/Common_subexpression_elimination)
386    for more general details on this optimization.
387  }];
388  let constructor = "mlir::createCSEPass()";
389  let statistics = [
390    Statistic<"numCSE", "num-cse'd", "Number of operations CSE'd">,
391    Statistic<"numDCE", "num-dce'd", "Number of operations DCE'd">
392  ];
393}
394
395def Inliner : Pass<"inline"> {
396  let summary = "Inline function calls";
397  let constructor = "mlir::createInlinerPass()";
398  let options = [
399    Option<"defaultPipelineStr", "default-pipeline", "std::string",
400           /*default=*/"", "The default optimizer pipeline used for callables">,
401    ListOption<"opPipelineStrs", "op-pipelines", "std::string",
402               "Callable operation specific optimizer pipelines (in the form "
403               "of `dialect.op(pipeline)`)",
404               "llvm::cl::MiscFlags::CommaSeparated">,
405    Option<"maxInliningIterations", "max-iterations", "unsigned",
406           /*default=*/"4",
407           "Maximum number of iterations when inlining within an SCC">,
408  ];
409}
410
411def FinalizingBufferize : FunctionPass<"finalizing-bufferize"> {
412  let summary = "Finalize a partial bufferization";
413  let description = [{
414    A bufferize pass that finalizes a partial bufferization by removing
415    remaining `memref.tensor_load` and `memref.buffer_cast` operations.
416
417    The removal of those operations is only possible if the operations only
418    exist in pairs, i.e., all uses of `memref.tensor_load` operations are
419    `memref.buffer_cast` operations.
420
421    This pass will fail if not all operations can be removed or if any operation
422    with tensor typed operands remains.
423  }];
424  let constructor = "mlir::createFinalizingBufferizePass()";
425}
426
427def LocationSnapshot : Pass<"snapshot-op-locations"> {
428  let summary = "Generate new locations from the current IR";
429  let description = [{
430    This pass allows for generating new locations from the IR during any stage
431    of compilation, by snapshotting the IR to a file and using that file to
432    generate new locations for the operations.
433
434    Depending on the value of the `tag` option, different resulting locations
435    may be generated:
436
437    * If unset, the original location of the operation is replaced.
438
439    Example:
440
441    ```mlir
442    // old:
443    ... loc("original_source.cpp":1:1)
444
445    // new:
446    ... loc("snapshot_source.mlir":10:10)
447    ```
448
449    * If set, the new location is fused with the original location in the form
450    of a [`Name Location`](Diagnostics.md#name-location) with the specified tag.
451
452    Example:
453
454    ```mlir
455    // old:
456    ... loc("original_source.cpp":1:1)
457
458    // new:
459    ... loc(fused["original_source.cpp":1:1, "snapshot"("snapshot_source.mlir":10:10)])
460    ```
461  }];
462  let constructor = "mlir::createLocationSnapshotPass()";
463  let options = [
464    Option<"fileName", "filename", "std::string", /*default=*/"",
465           "The filename to print the generated IR">,
466    Option<"tag", "tag", "std::string", /*default=*/"",
467           "A tag to use when fusing the new locations with the "
468           "original. If unset, the locations are replaced.">,
469  ];
470}
471
472def LoopCoalescing : FunctionPass<"loop-coalescing"> {
473  let summary = "Coalesce nested loops with independent bounds into a single "
474                "loop";
475  let constructor = "mlir::createLoopCoalescingPass()";
476}
477
478def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
479  let summary = "Hoist loop invariant instructions outside of the loop";
480  let constructor = "mlir::createLoopInvariantCodeMotionPass()";
481}
482
483def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> {
484  let summary = "Normalize memrefs";
485   let description = [{
486    This pass transforms memref types with a non-trivial
487    [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into
488    memref types with an identity layout map, e.g. (i, j) -> (i, j). This
489    pass is inter-procedural, in the sense that it can modify function
490    interfaces and call sites that pass memref types. In order to modify
491    memref types while preserving the original behavior, users of those
492    memref types are also modified to incorporate the resulting layout map.
493    For instance, an [AffineLoadOp]
494    (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop)
495    will be updated to compose the layout map with with the affine expression
496    contained in the op. Operations marked with the [MemRefsNormalizable]
497    (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
498    expected to be normalizable. Supported operations include affine
499    operations, memref.alloc, memref.dealloc, and std.return.
500
501    Given an appropriate layout map specified in the code, this transformation
502    can express tiled or linearized access to multi-dimensional data
503    structures, but will not modify memref types without an explicit layout
504    map.
505
506    Currently this pass is limited to only modify
507    functions where all memref types can be normalized. If a function
508    contains any operations that are not MemRefNormalizable, then the function
509    and any functions that call or call it will not be modified.
510
511    Input
512
513    ```mlir
514    #tile = affine_map<(i) -> (i floordiv 4, i mod 4)>
515    func @matmul(%A: memref<16xf64, #tile>,
516                 %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) {
517      affine.for %arg3 = 0 to 16 {
518            %a = affine.load %A[%arg3] : memref<16xf64, #tile>
519            %p = mulf %a, %a : f64
520            affine.store %p, %A[%arg3] : memref<16xf64, #tile>
521      }
522      %c = alloc() : memref<16xf64, #tile>
523      %d = affine.load %c[0] : memref<16xf64, #tile>
524      return %A: memref<16xf64, #tile>
525    }
526    ```
527
528    Output
529
530    ```mlir
531    func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>)
532      -> memref<4x4xf64> {
533      affine.for %arg3 = 0 to 16 {
534        %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
535        %4 = mulf %3, %3 : f64
536        affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
537      }
538      %0 = alloc() : memref<4x4xf64>
539      %1 = affine.apply #map1()
540      %2 = affine.load %0[0, 0] : memref<4x4xf64>
541      return %arg0 : memref<4x4xf64>
542    }
543    ```
544
545    Input
546
547    ```
548    #linear8 = affine_map<(i, j) -> (i * 8 + j)>
549    func @linearize(%arg0: memref<8x8xi32, #linear8>,
550                    %arg1: memref<8x8xi32, #linear8>,
551                    %arg2: memref<8x8xi32, #linear8>) {
552      %c8 = constant 8 : index
553      %c0 = constant 0 : index
554      %c1 = constant 1 : index
555      affine.for %arg3 = %c0 to %c8  {
556      affine.for %arg4 = %c0 to %c8  {
557        affine.for %arg5 = %c0 to %c8 {
558          %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8>
559          %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8>
560          %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
561          %3 = muli %0, %1 : i32
562          %4 = addi %2, %3 : i32
563          affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
564        }
565      }
566      }
567      return
568    }
569    ```
570
571    Output
572
573    ```mlir
574    func @linearize(%arg0: memref<64xi32>,
575                    %arg1: memref<64xi32>,
576                    %arg2: memref<64xi32>) {
577    %c8 = constant 8 : index
578    %c0 = constant 0 : index
579    affine.for %arg3 = %c0 to %c8 {
580      affine.for %arg4 = %c0 to %c8 {
581        affine.for %arg5 = %c0 to %c8 {
582          %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32>
583          %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32>
584          %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
585          %3 = muli %0, %1 : i32
586          %4 = addi %2, %3 : i32
587          affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
588        }
589      }
590    }
591    return
592  }
593  ```
594  }];
595  let constructor = "mlir::createNormalizeMemRefsPass()";
596  let dependentDialects = ["AffineDialect"];
597}
598
599def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
600  let summary = "Collapse parallel loops to use less induction variables";
601  let constructor = "mlir::createParallelLoopCollapsingPass()";
602  let options = [
603    ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
604               "Which loop indices to combine 0th loop index",
605               "llvm::cl::MiscFlags::CommaSeparated">,
606    ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned",
607               "Which loop indices to combine into the position 1 loop index",
608               "llvm::cl::MiscFlags::CommaSeparated">,
609    ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned",
610               "Which loop indices to combine into the position 2 loop index",
611               "llvm::cl::MiscFlags::CommaSeparated">,
612  ];
613}
614
615def PrintCFG : FunctionPass<"print-cfg-graph"> {
616  let summary = "Print CFG graph per-Region";
617  let constructor = "mlir::createPrintCFGGraphPass()";
618}
619
620def PrintOpStats : Pass<"print-op-stats"> {
621  let summary = "Print statistics of operations";
622  let constructor = "mlir::createPrintOpStatsPass()";
623}
624
625def SCCP : Pass<"sccp"> {
626  let summary = "Sparse Conditional Constant Propagation";
627  let description = [{
628    This pass implements a general algorithm for sparse conditional constant
629    propagation. This algorithm detects values that are known to be constant and
630    optimistically propagates this throughout the IR. Any values proven to be
631    constant are replaced, and removed if possible.
632
633    This implementation is based on the algorithm described by Wegman and Zadeck
634    in [“Constant Propagation with Conditional Branches”](https://dl.acm.org/doi/10.1145/103135.103136) (1991).
635  }];
636  let constructor = "mlir::createSCCPPass()";
637}
638
639def StripDebugInfo : Pass<"strip-debuginfo"> {
640  let summary = "Strip debug info from all operations";
641  let description = [{
642    This pass strips the IR of any location information, by replacing all
643    operation locations with [`unknown`](Diagnostics.md#unknown-location).
644  }];
645  let constructor = "mlir::createStripDebugInfoPass()";
646}
647
648def SymbolDCE : Pass<"symbol-dce"> {
649  let summary = "Eliminate dead symbols";
650  let description = [{
651    This pass deletes all symbols that are found to be unreachable. This is done
652    by computing the set of operations that are known to be live, propagating
653    that liveness to other symbols, and then deleting all symbols that are not
654    within this live set. Live symbols are those that have a
655    [visibility](SymbolsAndSymbolTables.md#symbol-visibility) that extends
656    beyond the IR, e.g. `public`, or those that are referenced by live symbols
657    or other non-Symbol operations.
658
659    For example, consider the following input:
660
661    ```mlir
662    func private @dead_private_function()
663    func private @live_private_function()
664
665    // Note: The `public` isn't necessary here, as this is the default.
666    func public @public_function() {
667      "foo.return"() {uses = [@live_private_function]} : () -> ()
668    }
669    ```
670
671    A known live function, `public_function`, contains a reference to an
672    otherwise non-live function `live_private_function`. After running
673    `symbol-dce`, only these two symbols should remain, as the final symbol
674    `dead_private_function` is not visible outside of the current IR and there
675    are no links to known-live operations. After running, we get the expected:
676
677    ```mlir
678    func private @live_private_function()
679
680    func public @public_function() {
681      "foo.return"() {uses = [@live_private_function]} : () -> ()
682    }
683    ```
684
685    See [Symbols and SymbolTables](SymbolsAndSymbolTables.md) for more
686    information on `Symbols`.
687  }];
688  let constructor = "mlir::createSymbolDCEPass()";
689}
690
691def ViewOpGraphPass : Pass<"view-op-graph", "ModuleOp"> {
692  let summary = "Print graphviz view of module";
693  let description = [{
694    This pass prints a graphviz per block of a module.
695
696    - Op are represented as nodes;
697    - Uses as edges;
698  }];
699  let constructor = "mlir::createPrintOpGraphPass()";
700  let options = [
701    Option<"title", "title", "std::string",
702           /*default=*/"", "The prefix of the title of the graph">,
703    Option<"shortNames", "short-names", "bool", /*default=*/"false",
704           "Use short names">
705  ];
706}
707
708#endif // MLIR_TRANSFORMS_PASSES
709