1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_clause instructions to form hard clauses.
11 ///
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
16 /// clauses".
17 ///
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
21 ///
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
28 ///
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
31 //
32 //===----------------------------------------------------------------------===//
33
34 #include "AMDGPU.h"
35 #include "GCNSubtarget.h"
36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37 #include "llvm/ADT/SmallVector.h"
38
39 using namespace llvm;
40
41 #define DEBUG_TYPE "si-insert-hard-clauses"
42
43 namespace {
44
45 enum HardClauseType {
46 // Texture, buffer, global or scratch memory instructions.
47 HARDCLAUSE_VMEM,
48 // Flat (not global or scratch) memory instructions.
49 HARDCLAUSE_FLAT,
50 // Instructions that access LDS.
51 HARDCLAUSE_LDS,
52 // Scalar memory instructions.
53 HARDCLAUSE_SMEM,
54 // VALU instructions.
55 HARDCLAUSE_VALU,
56 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
57
58 // Internal instructions, which are allowed in the middle of a hard clause,
59 // except for s_waitcnt.
60 HARDCLAUSE_INTERNAL,
61 // Instructions that are not allowed in a hard clause: SALU, export, branch,
62 // message, GDS, s_waitcnt and anything else not mentioned above.
63 HARDCLAUSE_ILLEGAL,
64 };
65
getHardClauseType(const MachineInstr & MI)66 HardClauseType getHardClauseType(const MachineInstr &MI) {
67 // On current architectures we only get a benefit from clausing loads.
68 if (MI.mayLoad()) {
69 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
70 return HARDCLAUSE_VMEM;
71 if (SIInstrInfo::isFLAT(MI))
72 return HARDCLAUSE_FLAT;
73 // TODO: LDS
74 if (SIInstrInfo::isSMRD(MI))
75 return HARDCLAUSE_SMEM;
76 }
77
78 // Don't form VALU clauses. It's not clear what benefit they give, if any.
79
80 // In practice s_nop is the only internal instruction we're likely to see.
81 // It's safe to treat the rest as illegal.
82 if (MI.getOpcode() == AMDGPU::S_NOP)
83 return HARDCLAUSE_INTERNAL;
84 return HARDCLAUSE_ILLEGAL;
85 }
86
87 class SIInsertHardClauses : public MachineFunctionPass {
88 public:
89 static char ID;
90
SIInsertHardClauses()91 SIInsertHardClauses() : MachineFunctionPass(ID) {}
92
getAnalysisUsage(AnalysisUsage & AU) const93 void getAnalysisUsage(AnalysisUsage &AU) const override {
94 AU.setPreservesCFG();
95 MachineFunctionPass::getAnalysisUsage(AU);
96 }
97
98 // Track information about a clause as we discover it.
99 struct ClauseInfo {
100 // The type of all (non-internal) instructions in the clause.
101 HardClauseType Type = HARDCLAUSE_ILLEGAL;
102 // The first (necessarily non-internal) instruction in the clause.
103 MachineInstr *First = nullptr;
104 // The last non-internal instruction in the clause.
105 MachineInstr *Last = nullptr;
106 // The length of the clause including any internal instructions in the
107 // middle or after the end of the clause.
108 unsigned Length = 0;
109 // The base operands of *Last.
110 SmallVector<const MachineOperand *, 4> BaseOps;
111 };
112
emitClause(const ClauseInfo & CI,const SIInstrInfo * SII)113 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
114 // Get the size of the clause excluding any internal instructions at the
115 // end.
116 unsigned Size =
117 std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
118 if (Size < 2)
119 return false;
120 assert(Size <= 64 && "Hard clause is too long!");
121
122 auto &MBB = *CI.First->getParent();
123 auto ClauseMI =
124 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
125 .addImm(Size - 1);
126 finalizeBundle(MBB, ClauseMI->getIterator(),
127 std::next(CI.Last->getIterator()));
128 return true;
129 }
130
runOnMachineFunction(MachineFunction & MF)131 bool runOnMachineFunction(MachineFunction &MF) override {
132 if (skipFunction(MF.getFunction()))
133 return false;
134
135 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
136 if (!ST.hasHardClauses())
137 return false;
138
139 const SIInstrInfo *SII = ST.getInstrInfo();
140 const TargetRegisterInfo *TRI = ST.getRegisterInfo();
141
142 bool Changed = false;
143 for (auto &MBB : MF) {
144 ClauseInfo CI;
145 for (auto &MI : MBB) {
146 HardClauseType Type = getHardClauseType(MI);
147
148 int64_t Dummy1;
149 bool Dummy2;
150 unsigned Dummy3;
151 SmallVector<const MachineOperand *, 4> BaseOps;
152 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
153 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
154 Dummy3, TRI)) {
155 // We failed to get the base operands, so we'll never clause this
156 // instruction with any other, so pretend it's illegal.
157 Type = HARDCLAUSE_ILLEGAL;
158 }
159 }
160
161 if (CI.Length == 64 ||
162 (CI.Length && Type != HARDCLAUSE_INTERNAL &&
163 (Type != CI.Type ||
164 // Note that we lie to shouldClusterMemOps about the size of the
165 // cluster. When shouldClusterMemOps is called from the machine
166 // scheduler it limits the size of the cluster to avoid increasing
167 // register pressure too much, but this pass runs after register
168 // allocation so there is no need for that kind of limit.
169 !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
170 // Finish the current clause.
171 Changed |= emitClause(CI, SII);
172 CI = ClauseInfo();
173 }
174
175 if (CI.Length) {
176 // Extend the current clause.
177 ++CI.Length;
178 if (Type != HARDCLAUSE_INTERNAL) {
179 CI.Last = &MI;
180 CI.BaseOps = std::move(BaseOps);
181 }
182 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
183 // Start a new clause.
184 CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
185 }
186 }
187
188 // Finish the last clause in the basic block if any.
189 if (CI.Length)
190 Changed |= emitClause(CI, SII);
191 }
192
193 return Changed;
194 }
195 };
196
197 } // namespace
198
199 char SIInsertHardClauses::ID = 0;
200
201 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
202
203 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
204 false, false)
205