1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
11 //
12 // Roughly, the following rules are recognized:
13 //
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
24 //
25 // The pass works in 4 steps:
26 //
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 //    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
29 //    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
30 //    a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 //    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
33 //    as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 //    replacing them will actually simplify the final code) are replaced
36 //    with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
38 //
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "SystemZ.h"
47 #include "SystemZSubtarget.h"
48 #include "llvm/ADT/MapVector.h"
49 #include "llvm/CodeGen/TargetPassConfig.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/IR/InstIterator.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/IntrinsicsS390.h"
56 #include "llvm/IR/LegacyPassManager.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/Target/TargetMachine.h"
59 #include <deque>
60 #include <set>
61 
62 using namespace llvm;
63 
64 namespace {
65 
66 class SystemZTDCPass : public FunctionPass {
67 public:
68   static char ID;
69   SystemZTDCPass() : FunctionPass(ID) {
70     initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
71   }
72 
73   bool runOnFunction(Function &F) override;
74 
75   void getAnalysisUsage(AnalysisUsage &AU) const override {
76     AU.addRequired<TargetPassConfig>();
77  }
78 
79 private:
80   // Maps seen instructions that can be mapped to a TDC, values are
81   // (TDC operand, TDC mask, worthy flag) triples.
82   MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
83   // The queue of and/or/xor i1 instructions to be potentially folded.
84   std::vector<BinaryOperator *> LogicOpsWorklist;
85   // Instructions matched while folding, to be removed at the end if unused.
86   std::set<Instruction *> PossibleJunk;
87 
88   // Tries to convert a fcmp instruction.
89   void convertFCmp(CmpInst &I);
90 
91   // Tries to convert an icmp instruction.
92   void convertICmp(CmpInst &I);
93 
94   // Tries to convert an i1 and/or/xor instruction, whose both operands
95   // have been already converted.
96   void convertLogicOp(BinaryOperator &I);
97 
98   // Marks an instruction as converted - adds it to ConvertedInsts and adds
99   // any and/or/xor i1 users to the queue.
100   void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
101     ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
102     auto &M = *I->getFunction()->getParent();
103     auto &Ctx = M.getContext();
104     for (auto *U : I->users()) {
105       auto *LI = dyn_cast<BinaryOperator>(U);
106       if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
107           (LI->getOpcode() == Instruction::And ||
108            LI->getOpcode() == Instruction::Or ||
109            LI->getOpcode() == Instruction::Xor)) {
110         LogicOpsWorklist.push_back(LI);
111       }
112     }
113   }
114 };
115 
116 } // end anonymous namespace
117 
118 char SystemZTDCPass::ID = 0;
119 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
120                 "SystemZ Test Data Class optimization", false, false)
121 
122 FunctionPass *llvm::createSystemZTDCPass() {
123   return new SystemZTDCPass();
124 }
125 
126 void SystemZTDCPass::convertFCmp(CmpInst &I) {
127   Value *Op0 = I.getOperand(0);
128   auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
129   auto Pred = I.getPredicate();
130   // Only comparisons with consts are interesting.
131   if (!Const)
132     return;
133   // Compute the smallest normal number (and its negation).
134   auto &Sem = Op0->getType()->getFltSemantics();
135   APFloat Smallest = APFloat::getSmallestNormalized(Sem);
136   APFloat NegSmallest = Smallest;
137   NegSmallest.changeSign();
138   // Check if Const is one of our recognized consts.
139   int WhichConst;
140   if (Const->isZero()) {
141     // All comparisons with 0 can be converted.
142     WhichConst = 0;
143   } else if (Const->isInfinity()) {
144     // Likewise for infinities.
145     WhichConst = Const->isNegative() ? 2 : 1;
146   } else if (Const->isExactlyValue(Smallest)) {
147     // For Smallest, we cannot do EQ separately from GT.
148     if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
149         (Pred & CmpInst::FCMP_OGE) != 0)
150       return;
151     WhichConst = 3;
152   } else if (Const->isExactlyValue(NegSmallest)) {
153     // Likewise for NegSmallest, we cannot do EQ separately from LT.
154     if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
155         (Pred & CmpInst::FCMP_OLE) != 0)
156       return;
157     WhichConst = 4;
158   } else {
159     // Not one of our special constants.
160     return;
161   }
162   // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
163   static const int Masks[][4] = {
164     { // 0
165       SystemZ::TDCMASK_ZERO,              // eq
166       SystemZ::TDCMASK_POSITIVE,          // gt
167       SystemZ::TDCMASK_NEGATIVE,          // lt
168       SystemZ::TDCMASK_NAN,               // un
169     },
170     { // inf
171       SystemZ::TDCMASK_INFINITY_PLUS,     // eq
172       0,                                  // gt
173       (SystemZ::TDCMASK_ZERO |
174        SystemZ::TDCMASK_NEGATIVE |
175        SystemZ::TDCMASK_NORMAL_PLUS |
176        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
177       SystemZ::TDCMASK_NAN,               // un
178     },
179     { // -inf
180       SystemZ::TDCMASK_INFINITY_MINUS,    // eq
181       (SystemZ::TDCMASK_ZERO |
182        SystemZ::TDCMASK_POSITIVE |
183        SystemZ::TDCMASK_NORMAL_MINUS |
184        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
185       0,                                  // lt
186       SystemZ::TDCMASK_NAN,               // un
187     },
188     { // minnorm
189       0,                                  // eq (unsupported)
190       (SystemZ::TDCMASK_NORMAL_PLUS |
191        SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
192       (SystemZ::TDCMASK_ZERO |
193        SystemZ::TDCMASK_NEGATIVE |
194        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
195       SystemZ::TDCMASK_NAN,               // un
196     },
197     { // -minnorm
198       0,                                  // eq (unsupported)
199       (SystemZ::TDCMASK_ZERO |
200        SystemZ::TDCMASK_POSITIVE |
201        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
202       (SystemZ::TDCMASK_NORMAL_MINUS |
203        SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
204       SystemZ::TDCMASK_NAN,               // un
205     }
206   };
207   // Construct the mask as a combination of the partial masks.
208   int Mask = 0;
209   if (Pred & CmpInst::FCMP_OEQ)
210     Mask |= Masks[WhichConst][0];
211   if (Pred & CmpInst::FCMP_OGT)
212     Mask |= Masks[WhichConst][1];
213   if (Pred & CmpInst::FCMP_OLT)
214     Mask |= Masks[WhichConst][2];
215   if (Pred & CmpInst::FCMP_UNO)
216     Mask |= Masks[WhichConst][3];
217   // A lone fcmp is unworthy of tdc conversion on its own, but may become
218   // worthy if combined with fabs.
219   bool Worthy = false;
220   if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
221     Function *F = CI->getCalledFunction();
222     if (F && F->getIntrinsicID() == Intrinsic::fabs) {
223       // Fold with fabs - adjust the mask appropriately.
224       Mask &= SystemZ::TDCMASK_PLUS;
225       Mask |= Mask >> 1;
226       Op0 = CI->getArgOperand(0);
227       // A combination of fcmp with fabs is a win, unless the constant
228       // involved is 0 (which is handled by later passes).
229       Worthy = WhichConst != 0;
230       PossibleJunk.insert(CI);
231     }
232   }
233   converted(&I, Op0, Mask, Worthy);
234 }
235 
236 void SystemZTDCPass::convertICmp(CmpInst &I) {
237   Value *Op0 = I.getOperand(0);
238   auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
239   auto Pred = I.getPredicate();
240   // All our icmp rules involve comparisons with consts.
241   if (!Const)
242     return;
243   if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
244     // Check for icmp+bitcast used for signbit.
245     if (!Cast->getSrcTy()->isFloatTy() &&
246         !Cast->getSrcTy()->isDoubleTy() &&
247         !Cast->getSrcTy()->isFP128Ty())
248       return;
249     Value *V = Cast->getOperand(0);
250     int Mask;
251     if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
252       // icmp slt (bitcast X), 0 - set if sign bit true
253       Mask = SystemZ::TDCMASK_MINUS;
254     } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
255       // icmp sgt (bitcast X), -1 - set if sign bit false
256       Mask = SystemZ::TDCMASK_PLUS;
257     } else {
258       // Not a sign bit check.
259       return;
260     }
261     PossibleJunk.insert(Cast);
262     converted(&I, V, Mask, true);
263   } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
264     // Check if this is a pre-existing call of our tdc intrinsic.
265     Function *F = CI->getCalledFunction();
266     if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
267       return;
268     if (!Const->isZero())
269       return;
270     Value *V = CI->getArgOperand(0);
271     auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
272     // Bail if the mask is not a constant.
273     if (!MaskC)
274       return;
275     int Mask = MaskC->getZExtValue();
276     Mask &= SystemZ::TDCMASK_ALL;
277     if (Pred == CmpInst::ICMP_NE) {
278       // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
279     } else if (Pred == CmpInst::ICMP_EQ) {
280       // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
281       Mask ^= SystemZ::TDCMASK_ALL;
282     } else {
283       // An unknown comparison - ignore.
284       return;
285     }
286     PossibleJunk.insert(CI);
287     converted(&I, V, Mask, false);
288   }
289 }
290 
291 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
292   Value *Op0, *Op1;
293   int Mask0, Mask1;
294   bool Worthy0, Worthy1;
295   std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
296   std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
297   if (Op0 != Op1)
298     return;
299   int Mask;
300   switch (I.getOpcode()) {
301     case Instruction::And:
302       Mask = Mask0 & Mask1;
303       break;
304     case Instruction::Or:
305       Mask = Mask0 | Mask1;
306       break;
307     case Instruction::Xor:
308       Mask = Mask0 ^ Mask1;
309       break;
310     default:
311       llvm_unreachable("Unknown op in convertLogicOp");
312   }
313   converted(&I, Op0, Mask, true);
314 }
315 
316 bool SystemZTDCPass::runOnFunction(Function &F) {
317   auto &TPC = getAnalysis<TargetPassConfig>();
318   if (TPC.getTM<TargetMachine>()
319           .getSubtarget<SystemZSubtarget>(F)
320           .hasSoftFloat())
321     return false;
322 
323   ConvertedInsts.clear();
324   LogicOpsWorklist.clear();
325   PossibleJunk.clear();
326 
327   // Look for icmp+fcmp instructions.
328   for (auto &I : instructions(F)) {
329     if (I.getOpcode() == Instruction::FCmp)
330       convertFCmp(cast<CmpInst>(I));
331     else if (I.getOpcode() == Instruction::ICmp)
332       convertICmp(cast<CmpInst>(I));
333   }
334 
335   // If none found, bail already.
336   if (ConvertedInsts.empty())
337     return false;
338 
339   // Process the queue of logic instructions.
340   while (!LogicOpsWorklist.empty()) {
341     BinaryOperator *Op = LogicOpsWorklist.back();
342     LogicOpsWorklist.pop_back();
343     // If both operands mapped, and the instruction itself not yet mapped,
344     // convert it.
345     if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
346         ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
347         !ConvertedInsts.count(Op))
348       convertLogicOp(*Op);
349   }
350 
351   // Time to actually replace the instructions.  Do it in the reverse order
352   // of finding them, since there's a good chance the earlier ones will be
353   // unused (due to being folded into later ones).
354   Module &M = *F.getParent();
355   auto &Ctx = M.getContext();
356   Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
357   bool MadeChange = false;
358   for (auto &It : reverse(ConvertedInsts)) {
359     Instruction *I = It.first;
360     Value *V;
361     int Mask;
362     bool Worthy;
363     std::tie(V, Mask, Worthy) = It.second;
364     if (!I->user_empty()) {
365       // If used and unworthy of conversion, skip it.
366       if (!Worthy)
367         continue;
368       // Call the intrinsic, compare result with 0.
369       Function *TDCFunc =
370           Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
371       IRBuilder<> IRB(I);
372       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
373       Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
374       Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
375       I->replaceAllUsesWith(ICmp);
376     }
377     // If unused, or used and converted, remove it.
378     I->eraseFromParent();
379     MadeChange = true;
380   }
381 
382   if (!MadeChange)
383     return false;
384 
385   // We've actually done something - now clear misc accumulated junk (fabs,
386   // bitcast).
387   for (auto *I : PossibleJunk)
388     if (I->user_empty())
389       I->eraseFromParent();
390 
391   return true;
392 }
393