1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_clause instructions to form hard clauses.
11 ///
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
16 /// clauses".
17 ///
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
21 ///
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
28 ///
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "AMDGPU.h"
35 #include "GCNSubtarget.h"
36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/CodeGen/MachineFunctionPass.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "si-insert-hard-clauses"
43 
44 namespace {
45 
46 // A clause length of 64 instructions could be encoded in the s_clause
47 // instruction, but the hardware documentation (at least for GFX11) says that
48 // 63 is the maximum allowed.
49 constexpr unsigned MaxInstructionsInClause = 63;
50 
51 enum HardClauseType {
52   // For GFX10:
53 
54   // Texture, buffer, global or scratch memory instructions.
55   HARDCLAUSE_VMEM,
56   // Flat (not global or scratch) memory instructions.
57   HARDCLAUSE_FLAT,
58 
59   // For GFX11:
60 
61   // Texture memory instructions.
62   HARDCLAUSE_MIMG_LOAD,
63   HARDCLAUSE_MIMG_STORE,
64   HARDCLAUSE_MIMG_ATOMIC,
65   HARDCLAUSE_MIMG_SAMPLE,
66   // Buffer, global or scratch memory instructions.
67   HARDCLAUSE_VMEM_LOAD,
68   HARDCLAUSE_VMEM_STORE,
69   HARDCLAUSE_VMEM_ATOMIC,
70   // Flat (not global or scratch) memory instructions.
71   HARDCLAUSE_FLAT_LOAD,
72   HARDCLAUSE_FLAT_STORE,
73   HARDCLAUSE_FLAT_ATOMIC,
74   // BVH instructions.
75   HARDCLAUSE_BVH,
76 
77   // Common:
78 
79   // Instructions that access LDS.
80   HARDCLAUSE_LDS,
81   // Scalar memory instructions.
82   HARDCLAUSE_SMEM,
83   // VALU instructions.
84   HARDCLAUSE_VALU,
85   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
86 
87   // Internal instructions, which are allowed in the middle of a hard clause,
88   // except for s_waitcnt.
89   HARDCLAUSE_INTERNAL,
90   // Meta instructions that do not result in any ISA like KILL.
91   HARDCLAUSE_IGNORE,
92   // Instructions that are not allowed in a hard clause: SALU, export, branch,
93   // message, GDS, s_waitcnt and anything else not mentioned above.
94   HARDCLAUSE_ILLEGAL,
95 };
96 
97 class SIInsertHardClauses : public MachineFunctionPass {
98 public:
99   static char ID;
100   const GCNSubtarget *ST = nullptr;
101 
102   SIInsertHardClauses() : MachineFunctionPass(ID) {}
103 
104   void getAnalysisUsage(AnalysisUsage &AU) const override {
105     AU.setPreservesCFG();
106     MachineFunctionPass::getAnalysisUsage(AU);
107   }
108 
109   HardClauseType getHardClauseType(const MachineInstr &MI) {
110     if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
111       if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
112         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
113           if (ST->hasNSAClauseBug()) {
114             const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
115             if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
116               return HARDCLAUSE_ILLEGAL;
117           }
118           return HARDCLAUSE_VMEM;
119         }
120         if (SIInstrInfo::isFLAT(MI))
121           return HARDCLAUSE_FLAT;
122       } else {
123         assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
124         if (SIInstrInfo::isMIMG(MI)) {
125           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
126           const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
127               AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
128           if (BaseInfo->BVH)
129             return HARDCLAUSE_BVH;
130           if (BaseInfo->Sampler)
131             return HARDCLAUSE_MIMG_SAMPLE;
132           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
133                                               : HARDCLAUSE_MIMG_LOAD
134                               : HARDCLAUSE_MIMG_STORE;
135         }
136         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
137           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
138                                               : HARDCLAUSE_VMEM_LOAD
139                               : HARDCLAUSE_VMEM_STORE;
140         }
141         if (SIInstrInfo::isFLAT(MI)) {
142           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
143                                               : HARDCLAUSE_FLAT_LOAD
144                               : HARDCLAUSE_FLAT_STORE;
145         }
146       }
147       // TODO: LDS
148       if (SIInstrInfo::isSMRD(MI))
149         return HARDCLAUSE_SMEM;
150     }
151 
152     // Don't form VALU clauses. It's not clear what benefit they give, if any.
153 
154     // In practice s_nop is the only internal instruction we're likely to see.
155     // It's safe to treat the rest as illegal.
156     if (MI.getOpcode() == AMDGPU::S_NOP)
157       return HARDCLAUSE_INTERNAL;
158     if (MI.isMetaInstruction())
159       return HARDCLAUSE_IGNORE;
160     return HARDCLAUSE_ILLEGAL;
161   }
162 
163   // Track information about a clause as we discover it.
164   struct ClauseInfo {
165     // The type of all (non-internal) instructions in the clause.
166     HardClauseType Type = HARDCLAUSE_ILLEGAL;
167     // The first (necessarily non-internal) instruction in the clause.
168     MachineInstr *First = nullptr;
169     // The last non-internal instruction in the clause.
170     MachineInstr *Last = nullptr;
171     // The length of the clause including any internal instructions in the
172     // middle (but not at the end) of the clause.
173     unsigned Length = 0;
174     // Internal instructions at the and of a clause should not be included in
175     // the clause. Count them in TrailingInternalLength until a new memory
176     // instruction is added.
177     unsigned TrailingInternalLength = 0;
178     // The base operands of *Last.
179     SmallVector<const MachineOperand *, 4> BaseOps;
180   };
181 
182   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
183     if (CI.First == CI.Last)
184       return false;
185     assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
186 
187     auto &MBB = *CI.First->getParent();
188     auto ClauseMI =
189         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
190             .addImm(CI.Length - 1);
191     finalizeBundle(MBB, ClauseMI->getIterator(),
192                    std::next(CI.Last->getIterator()));
193     return true;
194   }
195 
196   bool runOnMachineFunction(MachineFunction &MF) override {
197     if (skipFunction(MF.getFunction()))
198       return false;
199 
200     ST = &MF.getSubtarget<GCNSubtarget>();
201     if (!ST->hasHardClauses())
202       return false;
203 
204     const SIInstrInfo *SII = ST->getInstrInfo();
205     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
206 
207     bool Changed = false;
208     for (auto &MBB : MF) {
209       ClauseInfo CI;
210       for (auto &MI : MBB) {
211         HardClauseType Type = getHardClauseType(MI);
212 
213         int64_t Dummy1;
214         bool Dummy2;
215         unsigned Dummy3;
216         SmallVector<const MachineOperand *, 4> BaseOps;
217         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
218           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
219                                                   Dummy3, TRI)) {
220             // We failed to get the base operands, so we'll never clause this
221             // instruction with any other, so pretend it's illegal.
222             Type = HARDCLAUSE_ILLEGAL;
223           }
224         }
225 
226         if (CI.Length == MaxInstructionsInClause ||
227             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
228              Type != HARDCLAUSE_IGNORE &&
229              (Type != CI.Type ||
230               // Note that we lie to shouldClusterMemOps about the size of the
231               // cluster. When shouldClusterMemOps is called from the machine
232               // scheduler it limits the size of the cluster to avoid increasing
233               // register pressure too much, but this pass runs after register
234               // allocation so there is no need for that kind of limit.
235               // We also lie about the Offset and OffsetIsScalable parameters,
236               // as they aren't used in the SIInstrInfo implementation.
237               !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
238                                         2, 2)))) {
239           // Finish the current clause.
240           Changed |= emitClause(CI, SII);
241           CI = ClauseInfo();
242         }
243 
244         if (CI.Length) {
245           // Extend the current clause.
246           if (Type != HARDCLAUSE_IGNORE) {
247             if (Type == HARDCLAUSE_INTERNAL) {
248               ++CI.TrailingInternalLength;
249             } else {
250               ++CI.Length;
251               CI.Length += CI.TrailingInternalLength;
252               CI.TrailingInternalLength = 0;
253               CI.Last = &MI;
254               CI.BaseOps = std::move(BaseOps);
255             }
256           }
257         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
258           // Start a new clause.
259           CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
260         }
261       }
262 
263       // Finish the last clause in the basic block if any.
264       if (CI.Length)
265         Changed |= emitClause(CI, SII);
266     }
267 
268     return Changed;
269   }
270 };
271 
272 } // namespace
273 
274 char SIInsertHardClauses::ID = 0;
275 
276 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
277 
278 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
279                 false, false)
280