1 //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The QPX vector registers overlay the scalar floating-point registers, and
10 // any scalar floating-point loads splat their value across all vector lanes.
11 // Thus, if we have a scalar load followed by a splat, we can remove the splat
12 // (i.e. replace the load with a load-and-splat pseudo instruction).
13 //
14 // This pass must run after anything that might do store-to-load forwarding.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "PPC.h"
19 #include "PPCInstrBuilder.h"
20 #include "PPCInstrInfo.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Target/TargetMachine.h"
27 using namespace llvm;
28
29 #define DEBUG_TYPE "ppc-qpx-load-splat"
30
31 STATISTIC(NumSimplified, "Number of QPX load splats simplified");
32
33 namespace {
34 struct PPCQPXLoadSplat : public MachineFunctionPass {
35 static char ID;
PPCQPXLoadSplat__anoncaf9e4830111::PPCQPXLoadSplat36 PPCQPXLoadSplat() : MachineFunctionPass(ID) {
37 initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
38 }
39
40 bool runOnMachineFunction(MachineFunction &Fn) override;
41
getPassName__anoncaf9e4830111::PPCQPXLoadSplat42 StringRef getPassName() const override {
43 return "PowerPC QPX Load Splat Simplification";
44 }
45 };
46 char PPCQPXLoadSplat::ID = 0;
47 }
48
49 INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
50 "PowerPC QPX Load Splat Simplification",
51 false, false)
52
createPPCQPXLoadSplatPass()53 FunctionPass *llvm::createPPCQPXLoadSplatPass() {
54 return new PPCQPXLoadSplat();
55 }
56
runOnMachineFunction(MachineFunction & MF)57 bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
58 if (skipFunction(MF.getFunction()))
59 return false;
60
61 bool MadeChange = false;
62 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
63
64 for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
65 MachineBasicBlock *MBB = &*MFI;
66 SmallVector<MachineInstr *, 4> Splats;
67
68 for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
69 MachineInstr *MI = &*MBBI;
70
71 if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
72 Splats.clear();
73 continue;
74 }
75
76 // We're looking for a sequence like this:
77 // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
78 // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
79
80 for (auto SI = Splats.begin(); SI != Splats.end();) {
81 MachineInstr *SMI = *SI;
82 Register SplatReg = SMI->getOperand(0).getReg();
83 Register SrcReg = SMI->getOperand(1).getReg();
84
85 if (MI->modifiesRegister(SrcReg, TRI)) {
86 switch (MI->getOpcode()) {
87 default:
88 SI = Splats.erase(SI);
89 continue;
90 case PPC::LFS:
91 case PPC::LFD:
92 case PPC::LFSU:
93 case PPC::LFDU:
94 case PPC::LFSUX:
95 case PPC::LFDUX:
96 case PPC::LFSX:
97 case PPC::LFDX:
98 case PPC::LFIWAX:
99 case PPC::LFIWZX:
100 if (SplatReg != SrcReg) {
101 // We need to change the load to define the scalar subregister of
102 // the QPX splat source register.
103 unsigned SubRegIndex =
104 TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
105 Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
106
107 // Substitute both the explicit defined register, and also the
108 // implicit def of the containing QPX register.
109 MI->getOperand(0).setReg(SplatSubReg);
110 MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
111 }
112
113 SI = Splats.erase(SI);
114
115 // If SMI is directly after MI, then MBBI's base iterator is
116 // pointing at SMI. Adjust MBBI around the call to erase SMI to
117 // avoid invalidating MBBI.
118 ++MBBI;
119 SMI->eraseFromParent();
120 --MBBI;
121
122 ++NumSimplified;
123 MadeChange = true;
124 continue;
125 }
126 }
127
128 // If this instruction defines the splat register, then we cannot move
129 // the previous definition above it. If it reads from the splat
130 // register, then it must already be alive from some previous
131 // definition, and if the splat register is different from the source
132 // register, then this definition must not be the load for which we're
133 // searching.
134 if (MI->modifiesRegister(SplatReg, TRI) ||
135 (SrcReg != SplatReg &&
136 MI->readsRegister(SplatReg, TRI))) {
137 SI = Splats.erase(SI);
138 continue;
139 }
140
141 ++SI;
142 }
143
144 if (MI->getOpcode() != PPC::QVESPLATI &&
145 MI->getOpcode() != PPC::QVESPLATIs &&
146 MI->getOpcode() != PPC::QVESPLATIb)
147 continue;
148 if (MI->getOperand(2).getImm() != 0)
149 continue;
150
151 // If there are other uses of the scalar value after this, replacing
152 // those uses might be non-trivial.
153 if (!MI->getOperand(1).isKill())
154 continue;
155
156 Splats.push_back(MI);
157 }
158 }
159
160 return MadeChange;
161 }
162