1 //===- StdExpandDivs.cpp - Code to prepare Std for lowering Divs to LLVM -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file Std transformations to expand Divs operation to help for the
10 // lowering to LLVM. Currently implemented transformations are Ceil and Floor
11 // for Signed Integers.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "PassDetail.h"
16 #include "mlir/Dialect/StandardOps/IR/Ops.h"
17 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
18 #include "mlir/IR/PatternMatch.h"
19
20 using namespace mlir;
21
22 namespace {
23
24 /// Converts `atomic_rmw` that cannot be lowered to a simple atomic op with
25 /// AtomicRMWOpLowering pattern, e.g. with "minf" or "maxf" attributes, to
26 /// `generic_atomic_rmw` with the expanded code.
27 ///
28 /// %x = atomic_rmw "maxf" %fval, %F[%i] : (f32, memref<10xf32>) -> f32
29 ///
30 /// will be lowered to
31 ///
32 /// %x = std.generic_atomic_rmw %F[%i] : memref<10xf32> {
33 /// ^bb0(%current: f32):
34 /// %cmp = cmpf "ogt", %current, %fval : f32
35 /// %new_value = select %cmp, %current, %fval : f32
36 /// atomic_yield %new_value : f32
37 /// }
38 struct AtomicRMWOpConverter : public OpRewritePattern<AtomicRMWOp> {
39 public:
40 using OpRewritePattern::OpRewritePattern;
41
matchAndRewrite__anon541ce2fc0111::AtomicRMWOpConverter42 LogicalResult matchAndRewrite(AtomicRMWOp op,
43 PatternRewriter &rewriter) const final {
44 CmpFPredicate predicate;
45 switch (op.kind()) {
46 case AtomicRMWKind::maxf:
47 predicate = CmpFPredicate::OGT;
48 break;
49 case AtomicRMWKind::minf:
50 predicate = CmpFPredicate::OLT;
51 break;
52 default:
53 return failure();
54 }
55
56 auto loc = op.getLoc();
57 auto genericOp =
58 rewriter.create<GenericAtomicRMWOp>(loc, op.memref(), op.indices());
59 OpBuilder bodyBuilder =
60 OpBuilder::atBlockEnd(genericOp.getBody(), rewriter.getListener());
61
62 Value lhs = genericOp.getCurrentValue();
63 Value rhs = op.value();
64 Value cmp = bodyBuilder.create<CmpFOp>(loc, predicate, lhs, rhs);
65 Value select = bodyBuilder.create<SelectOp>(loc, cmp, lhs, rhs);
66 bodyBuilder.create<AtomicYieldOp>(loc, select);
67
68 rewriter.replaceOp(op, genericOp.getResult());
69 return success();
70 }
71 };
72
73 /// Converts `memref_reshape` that has a target shape of a statically-known
74 /// size to `memref_reinterpret_cast`.
75 struct MemRefReshapeOpConverter : public OpRewritePattern<MemRefReshapeOp> {
76 public:
77 using OpRewritePattern::OpRewritePattern;
78
matchAndRewrite__anon541ce2fc0111::MemRefReshapeOpConverter79 LogicalResult matchAndRewrite(MemRefReshapeOp op,
80 PatternRewriter &rewriter) const final {
81 auto shapeType = op.shape().getType().cast<MemRefType>();
82 if (!shapeType.hasStaticShape())
83 return failure();
84
85 int64_t rank = shapeType.cast<MemRefType>().getDimSize(0);
86 SmallVector<OpFoldResult, 4> sizes, strides;
87 sizes.resize(rank);
88 strides.resize(rank);
89
90 Location loc = op.getLoc();
91 Value stride = rewriter.create<ConstantIndexOp>(loc, 1);
92 for (int i = rank - 1; i >= 0; --i) {
93 Value index = rewriter.create<ConstantIndexOp>(loc, i);
94 Value size = rewriter.create<LoadOp>(loc, op.shape(), index);
95 if (!size.getType().isa<IndexType>())
96 size = rewriter.create<IndexCastOp>(loc, size, rewriter.getIndexType());
97 sizes[i] = size;
98 strides[i] = stride;
99 if (i > 0)
100 stride = rewriter.create<MulIOp>(loc, stride, size);
101 }
102 rewriter.replaceOpWithNewOp<MemRefReinterpretCastOp>(
103 op, op.getType(), op.source(), /*offset=*/rewriter.getIndexAttr(0),
104 sizes, strides);
105 return success();
106 }
107 };
108
109 /// Expands SignedCeilDivIOP (n, m) into
110 /// 1) x = (m > 0) ? -1 : 1
111 /// 2) (n*m>0) ? ((n+x) / m) + 1 : - (-n / m)
112 struct SignedCeilDivIOpConverter : public OpRewritePattern<SignedCeilDivIOp> {
113 public:
114 using OpRewritePattern::OpRewritePattern;
matchAndRewrite__anon541ce2fc0111::SignedCeilDivIOpConverter115 LogicalResult matchAndRewrite(SignedCeilDivIOp op,
116 PatternRewriter &rewriter) const final {
117 Location loc = op.getLoc();
118 SignedCeilDivIOp signedCeilDivIOp = cast<SignedCeilDivIOp>(op);
119 Type type = signedCeilDivIOp.getType();
120 Value a = signedCeilDivIOp.lhs();
121 Value b = signedCeilDivIOp.rhs();
122 Value plusOne =
123 rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 1));
124 Value zero =
125 rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 0));
126 Value minusOne =
127 rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, -1));
128 // Compute x = (b>0) ? -1 : 1.
129 Value compare = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, b, zero);
130 Value x = rewriter.create<SelectOp>(loc, compare, minusOne, plusOne);
131 // Compute positive res: 1 + ((x+a)/b).
132 Value xPlusA = rewriter.create<AddIOp>(loc, x, a);
133 Value xPlusADivB = rewriter.create<SignedDivIOp>(loc, xPlusA, b);
134 Value posRes = rewriter.create<AddIOp>(loc, plusOne, xPlusADivB);
135 // Compute negative res: - ((-a)/b).
136 Value minusA = rewriter.create<SubIOp>(loc, zero, a);
137 Value minusADivB = rewriter.create<SignedDivIOp>(loc, minusA, b);
138 Value negRes = rewriter.create<SubIOp>(loc, zero, minusADivB);
139 // Result is (a*b>0) ? pos result : neg result.
140 // Note, we want to avoid using a*b because of possible overflow.
141 // The case that matters are a>0, a==0, a<0, b>0 and b<0. We do
142 // not particuliarly care if a*b<0 is true or false when b is zero
143 // as this will result in an illegal divide. So `a*b<0` can be reformulated
144 // as `(a<0 && b<0) || (a>0 && b>0)' or `(a<0 && b<0) || (a>0 && b>=0)'.
145 // We pick the first expression here.
146 Value aNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, a, zero);
147 Value aPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, a, zero);
148 Value bNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, b, zero);
149 Value bPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, b, zero);
150 Value firstTerm = rewriter.create<AndOp>(loc, aNeg, bNeg);
151 Value secondTerm = rewriter.create<AndOp>(loc, aPos, bPos);
152 Value compareRes = rewriter.create<OrOp>(loc, firstTerm, secondTerm);
153 Value res = rewriter.create<SelectOp>(loc, compareRes, posRes, negRes);
154 // Perform substitution and return success.
155 rewriter.replaceOp(op, {res});
156 return success();
157 }
158 };
159
160 /// Expands SignedFloorDivIOP (n, m) into
161 /// 1) x = (m<0) ? 1 : -1
162 /// 2) return (n*m<0) ? - ((-n+x) / m) -1 : n / m
163 struct SignedFloorDivIOpConverter : public OpRewritePattern<SignedFloorDivIOp> {
164 public:
165 using OpRewritePattern::OpRewritePattern;
matchAndRewrite__anon541ce2fc0111::SignedFloorDivIOpConverter166 LogicalResult matchAndRewrite(SignedFloorDivIOp op,
167 PatternRewriter &rewriter) const final {
168 Location loc = op.getLoc();
169 SignedFloorDivIOp signedFloorDivIOp = cast<SignedFloorDivIOp>(op);
170 Type type = signedFloorDivIOp.getType();
171 Value a = signedFloorDivIOp.lhs();
172 Value b = signedFloorDivIOp.rhs();
173 Value plusOne =
174 rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 1));
175 Value zero =
176 rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, 0));
177 Value minusOne =
178 rewriter.create<ConstantOp>(loc, rewriter.getIntegerAttr(type, -1));
179 // Compute x = (b<0) ? 1 : -1.
180 Value compare = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, b, zero);
181 Value x = rewriter.create<SelectOp>(loc, compare, plusOne, minusOne);
182 // Compute negative res: -1 - ((x-a)/b).
183 Value xMinusA = rewriter.create<SubIOp>(loc, x, a);
184 Value xMinusADivB = rewriter.create<SignedDivIOp>(loc, xMinusA, b);
185 Value negRes = rewriter.create<SubIOp>(loc, minusOne, xMinusADivB);
186 // Compute positive res: a/b.
187 Value posRes = rewriter.create<SignedDivIOp>(loc, a, b);
188 // Result is (a*b<0) ? negative result : positive result.
189 // Note, we want to avoid using a*b because of possible overflow.
190 // The case that matters are a>0, a==0, a<0, b>0 and b<0. We do
191 // not particuliarly care if a*b<0 is true or false when b is zero
192 // as this will result in an illegal divide. So `a*b<0` can be reformulated
193 // as `(a>0 && b<0) || (a>0 && b<0)' or `(a>0 && b<0) || (a>0 && b<=0)'.
194 // We pick the first expression here.
195 Value aNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, a, zero);
196 Value aPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, a, zero);
197 Value bNeg = rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, b, zero);
198 Value bPos = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt, b, zero);
199 Value firstTerm = rewriter.create<AndOp>(loc, aNeg, bPos);
200 Value secondTerm = rewriter.create<AndOp>(loc, aPos, bNeg);
201 Value compareRes = rewriter.create<OrOp>(loc, firstTerm, secondTerm);
202 Value res = rewriter.create<SelectOp>(loc, compareRes, negRes, posRes);
203 // Perform substitution and return success.
204 rewriter.replaceOp(op, {res});
205 return success();
206 }
207 };
208
209 struct StdExpandOpsPass : public StdExpandOpsBase<StdExpandOpsPass> {
runOnFunction__anon541ce2fc0111::StdExpandOpsPass210 void runOnFunction() override {
211 MLIRContext &ctx = getContext();
212
213 OwningRewritePatternList patterns;
214 populateStdExpandOpsPatterns(&ctx, patterns);
215
216 ConversionTarget target(getContext());
217
218 target.addLegalDialect<StandardOpsDialect>();
219 target.addDynamicallyLegalOp<AtomicRMWOp>([](AtomicRMWOp op) {
220 return op.kind() != AtomicRMWKind::maxf &&
221 op.kind() != AtomicRMWKind::minf;
222 });
223 target.addDynamicallyLegalOp<MemRefReshapeOp>([](MemRefReshapeOp op) {
224 return !op.shape().getType().cast<MemRefType>().hasStaticShape();
225 });
226 target.addIllegalOp<SignedCeilDivIOp>();
227 target.addIllegalOp<SignedFloorDivIOp>();
228 if (failed(
229 applyPartialConversion(getFunction(), target, std::move(patterns))))
230 signalPassFailure();
231 }
232 };
233
234 } // namespace
235
populateStdExpandOpsPatterns(MLIRContext * context,OwningRewritePatternList & patterns)236 void mlir::populateStdExpandOpsPatterns(MLIRContext *context,
237 OwningRewritePatternList &patterns) {
238 patterns.insert<AtomicRMWOpConverter, MemRefReshapeOpConverter,
239 SignedCeilDivIOpConverter, SignedFloorDivIOpConverter>(
240 context);
241 }
242
createStdExpandOpsPass()243 std::unique_ptr<Pass> mlir::createStdExpandOpsPass() {
244 return std::make_unique<StdExpandOpsPass>();
245 }
246