1 //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The QPX vector registers overlay the scalar floating-point registers, and
10 // any scalar floating-point loads splat their value across all vector lanes.
11 // Thus, if we have a scalar load followed by a splat, we can remove the splat
12 // (i.e. replace the load with a load-and-splat pseudo instruction).
13 //
14 // This pass must run after anything that might do store-to-load forwarding.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "PPC.h"
19 #include "PPCInstrBuilder.h"
20 #include "PPCInstrInfo.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Target/TargetMachine.h"
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "ppc-qpx-load-splat"
30 
31 STATISTIC(NumSimplified, "Number of QPX load splats simplified");
32 
33 namespace {
34   struct PPCQPXLoadSplat : public MachineFunctionPass {
35     static char ID;
PPCQPXLoadSplat__anoncaf9e4830111::PPCQPXLoadSplat36     PPCQPXLoadSplat() : MachineFunctionPass(ID) {
37       initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
38     }
39 
40     bool runOnMachineFunction(MachineFunction &Fn) override;
41 
getPassName__anoncaf9e4830111::PPCQPXLoadSplat42     StringRef getPassName() const override {
43       return "PowerPC QPX Load Splat Simplification";
44     }
45   };
46   char PPCQPXLoadSplat::ID = 0;
47 }
48 
49 INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
50                 "PowerPC QPX Load Splat Simplification",
51                 false, false)
52 
createPPCQPXLoadSplatPass()53 FunctionPass *llvm::createPPCQPXLoadSplatPass() {
54   return new PPCQPXLoadSplat();
55 }
56 
runOnMachineFunction(MachineFunction & MF)57 bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
58   if (skipFunction(MF.getFunction()))
59     return false;
60 
61   bool MadeChange = false;
62   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
63 
64   for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
65     MachineBasicBlock *MBB = &*MFI;
66     SmallVector<MachineInstr *, 4> Splats;
67 
68     for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
69       MachineInstr *MI = &*MBBI;
70 
71       if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
72         Splats.clear();
73         continue;
74       }
75 
76       // We're looking for a sequence like this:
77       // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
78       // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
79 
80       for (auto SI = Splats.begin(); SI != Splats.end();) {
81         MachineInstr *SMI = *SI;
82         Register SplatReg = SMI->getOperand(0).getReg();
83         Register SrcReg = SMI->getOperand(1).getReg();
84 
85         if (MI->modifiesRegister(SrcReg, TRI)) {
86           switch (MI->getOpcode()) {
87           default:
88             SI = Splats.erase(SI);
89             continue;
90           case PPC::LFS:
91           case PPC::LFD:
92           case PPC::LFSU:
93           case PPC::LFDU:
94           case PPC::LFSUX:
95           case PPC::LFDUX:
96           case PPC::LFSX:
97           case PPC::LFDX:
98           case PPC::LFIWAX:
99           case PPC::LFIWZX:
100             if (SplatReg != SrcReg) {
101               // We need to change the load to define the scalar subregister of
102               // the QPX splat source register.
103               unsigned SubRegIndex =
104                 TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
105               Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
106 
107               // Substitute both the explicit defined register, and also the
108               // implicit def of the containing QPX register.
109               MI->getOperand(0).setReg(SplatSubReg);
110               MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
111             }
112 
113             SI = Splats.erase(SI);
114 
115             // If SMI is directly after MI, then MBBI's base iterator is
116             // pointing at SMI.  Adjust MBBI around the call to erase SMI to
117             // avoid invalidating MBBI.
118             ++MBBI;
119             SMI->eraseFromParent();
120             --MBBI;
121 
122             ++NumSimplified;
123             MadeChange = true;
124             continue;
125           }
126         }
127 
128         // If this instruction defines the splat register, then we cannot move
129         // the previous definition above it. If it reads from the splat
130         // register, then it must already be alive from some previous
131         // definition, and if the splat register is different from the source
132         // register, then this definition must not be the load for which we're
133         // searching.
134         if (MI->modifiesRegister(SplatReg, TRI) ||
135             (SrcReg != SplatReg &&
136              MI->readsRegister(SplatReg, TRI))) {
137           SI = Splats.erase(SI);
138           continue;
139         }
140 
141         ++SI;
142       }
143 
144       if (MI->getOpcode() != PPC::QVESPLATI &&
145           MI->getOpcode() != PPC::QVESPLATIs &&
146           MI->getOpcode() != PPC::QVESPLATIb)
147         continue;
148       if (MI->getOperand(2).getImm() != 0)
149         continue;
150 
151       // If there are other uses of the scalar value after this, replacing
152       // those uses might be non-trivial.
153       if (!MI->getOperand(1).isKill())
154         continue;
155 
156       Splats.push_back(MI);
157     }
158   }
159 
160   return MadeChange;
161 }
162