1 //===- AffineParallelize.cpp - Affineparallelize Pass---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a parallelizer for affine loop nests that is able to
10 // perform inner or outer loop parallelization.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PassDetail.h"
15 #include "mlir/Analysis/AffineAnalysis.h"
16 #include "mlir/Analysis/AffineStructures.h"
17 #include "mlir/Analysis/LoopAnalysis.h"
18 #include "mlir/Analysis/Utils.h"
19 #include "mlir/Dialect/Affine/IR/AffineOps.h"
20 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
21 #include "mlir/Dialect/Affine/Passes.h"
22 #include "mlir/Dialect/Affine/Passes.h.inc"
23 #include "mlir/Dialect/Affine/Utils.h"
24 #include "mlir/Transforms/LoopUtils.h"
25 #include "llvm/Support/Debug.h"
26 #include <deque>
27 
28 #define DEBUG_TYPE "affine-parallel"
29 
30 using namespace mlir;
31 
32 namespace {
33 /// Convert all parallel affine.for op into 1-D affine.parallel op.
34 struct AffineParallelize : public AffineParallelizeBase<AffineParallelize> {
35   void runOnFunction() override;
36 };
37 
38 /// Descriptor of a potentially parallelizable loop.
39 struct ParallelizationCandidate {
ParallelizationCandidate__anon250f963c0111::ParallelizationCandidate40   ParallelizationCandidate(AffineForOp l, SmallVector<LoopReduction> &&r)
41       : loop(l), reductions(std::move(r)) {}
42 
43   /// The potentially parallelizable loop.
44   AffineForOp loop;
45   /// Desciprtors of reductions that can be parallelized in the loop.
46   SmallVector<LoopReduction> reductions;
47 };
48 } // namespace
49 
runOnFunction()50 void AffineParallelize::runOnFunction() {
51   FuncOp f = getFunction();
52 
53   // The walker proceeds in pre-order to process the outer loops first
54   // and control the number of outer parallel loops.
55   std::vector<ParallelizationCandidate> parallelizableLoops;
56   f.walk<WalkOrder::PreOrder>([&](AffineForOp loop) {
57     SmallVector<LoopReduction> reductions;
58     if (isLoopParallel(loop, parallelReductions ? &reductions : nullptr))
59       parallelizableLoops.push_back({loop, std::move(reductions)});
60   });
61 
62   for (const ParallelizationCandidate &candidate : parallelizableLoops) {
63     unsigned numParentParallelOps = 0;
64     AffineForOp loop = candidate.loop;
65     for (Operation *op = loop->getParentOp();
66          op != nullptr && !op->hasTrait<OpTrait::AffineScope>();
67          op = op->getParentOp()) {
68       if (isa<AffineParallelOp>(op))
69         ++numParentParallelOps;
70     }
71 
72     if (numParentParallelOps < maxNested) {
73       if (failed(affineParallelize(loop, candidate.reductions))) {
74         LLVM_DEBUG(llvm::dbgs() << "[" DEBUG_TYPE "] failed to parallelize\n"
75                                 << loop);
76       }
77     } else {
78       LLVM_DEBUG(llvm::dbgs() << "[" DEBUG_TYPE "] too many nested loops\n"
79                               << loop);
80     }
81   }
82 }
83 
createAffineParallelizePass()84 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineParallelizePass() {
85   return std::make_unique<AffineParallelize>();
86 }
87