1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_clause instructions to form hard clauses.
11 ///
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
16 /// clauses".
17 ///
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
21 ///
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
28 ///
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "AMDGPU.h"
35 #include "GCNSubtarget.h"
36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37 #include "llvm/ADT/SmallVector.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "si-insert-hard-clauses"
42 
43 namespace {
44 
45 enum HardClauseType {
46   // Texture, buffer, global or scratch memory instructions.
47   HARDCLAUSE_VMEM,
48   // Flat (not global or scratch) memory instructions.
49   HARDCLAUSE_FLAT,
50   // Instructions that access LDS.
51   HARDCLAUSE_LDS,
52   // Scalar memory instructions.
53   HARDCLAUSE_SMEM,
54   // VALU instructions.
55   HARDCLAUSE_VALU,
56   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
57 
58   // Internal instructions, which are allowed in the middle of a hard clause,
59   // except for s_waitcnt.
60   HARDCLAUSE_INTERNAL,
61   // Instructions that are not allowed in a hard clause: SALU, export, branch,
62   // message, GDS, s_waitcnt and anything else not mentioned above.
63   HARDCLAUSE_ILLEGAL,
64 };
65 
66 class SIInsertHardClauses : public MachineFunctionPass {
67 public:
68   static char ID;
69   const GCNSubtarget *ST = nullptr;
70 
SIInsertHardClauses()71   SIInsertHardClauses() : MachineFunctionPass(ID) {}
72 
getAnalysisUsage(AnalysisUsage & AU) const73   void getAnalysisUsage(AnalysisUsage &AU) const override {
74     AU.setPreservesCFG();
75     MachineFunctionPass::getAnalysisUsage(AU);
76   }
77 
getHardClauseType(const MachineInstr & MI)78   HardClauseType getHardClauseType(const MachineInstr &MI) {
79 
80     // On current architectures we only get a benefit from clausing loads.
81     if (MI.mayLoad()) {
82       if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
83         if (ST->hasNSAClauseBug()) {
84           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
85           if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
86             return HARDCLAUSE_ILLEGAL;
87         }
88         return HARDCLAUSE_VMEM;
89       }
90       if (SIInstrInfo::isFLAT(MI))
91         return HARDCLAUSE_FLAT;
92       // TODO: LDS
93       if (SIInstrInfo::isSMRD(MI))
94         return HARDCLAUSE_SMEM;
95     }
96 
97     // Don't form VALU clauses. It's not clear what benefit they give, if any.
98 
99     // In practice s_nop is the only internal instruction we're likely to see.
100     // It's safe to treat the rest as illegal.
101     if (MI.getOpcode() == AMDGPU::S_NOP)
102       return HARDCLAUSE_INTERNAL;
103     return HARDCLAUSE_ILLEGAL;
104   }
105 
106   // Track information about a clause as we discover it.
107   struct ClauseInfo {
108     // The type of all (non-internal) instructions in the clause.
109     HardClauseType Type = HARDCLAUSE_ILLEGAL;
110     // The first (necessarily non-internal) instruction in the clause.
111     MachineInstr *First = nullptr;
112     // The last non-internal instruction in the clause.
113     MachineInstr *Last = nullptr;
114     // The length of the clause including any internal instructions in the
115     // middle or after the end of the clause.
116     unsigned Length = 0;
117     // The base operands of *Last.
118     SmallVector<const MachineOperand *, 4> BaseOps;
119   };
120 
emitClause(const ClauseInfo & CI,const SIInstrInfo * SII)121   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
122     // Get the size of the clause excluding any internal instructions at the
123     // end.
124     unsigned Size =
125         std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
126     if (Size < 2)
127       return false;
128     assert(Size <= 64 && "Hard clause is too long!");
129 
130     auto &MBB = *CI.First->getParent();
131     auto ClauseMI =
132         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
133             .addImm(Size - 1);
134     finalizeBundle(MBB, ClauseMI->getIterator(),
135                    std::next(CI.Last->getIterator()));
136     return true;
137   }
138 
runOnMachineFunction(MachineFunction & MF)139   bool runOnMachineFunction(MachineFunction &MF) override {
140     if (skipFunction(MF.getFunction()))
141       return false;
142 
143     ST = &MF.getSubtarget<GCNSubtarget>();
144     if (!ST->hasHardClauses())
145       return false;
146 
147     const SIInstrInfo *SII = ST->getInstrInfo();
148     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
149 
150     bool Changed = false;
151     for (auto &MBB : MF) {
152       ClauseInfo CI;
153       for (auto &MI : MBB) {
154         HardClauseType Type = getHardClauseType(MI);
155 
156         int64_t Dummy1;
157         bool Dummy2;
158         unsigned Dummy3;
159         SmallVector<const MachineOperand *, 4> BaseOps;
160         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
161           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
162                                                   Dummy3, TRI)) {
163             // We failed to get the base operands, so we'll never clause this
164             // instruction with any other, so pretend it's illegal.
165             Type = HARDCLAUSE_ILLEGAL;
166           }
167         }
168 
169         if (CI.Length == 64 ||
170             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
171              (Type != CI.Type ||
172               // Note that we lie to shouldClusterMemOps about the size of the
173               // cluster. When shouldClusterMemOps is called from the machine
174               // scheduler it limits the size of the cluster to avoid increasing
175               // register pressure too much, but this pass runs after register
176               // allocation so there is no need for that kind of limit.
177               !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
178           // Finish the current clause.
179           Changed |= emitClause(CI, SII);
180           CI = ClauseInfo();
181         }
182 
183         if (CI.Length) {
184           // Extend the current clause.
185           ++CI.Length;
186           if (Type != HARDCLAUSE_INTERNAL) {
187             CI.Last = &MI;
188             CI.BaseOps = std::move(BaseOps);
189           }
190         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
191           // Start a new clause.
192           CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
193         }
194       }
195 
196       // Finish the last clause in the basic block if any.
197       if (CI.Length)
198         Changed |= emitClause(CI, SII);
199     }
200 
201     return Changed;
202   }
203 };
204 
205 } // namespace
206 
207 char SIInsertHardClauses::ID = 0;
208 
209 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
210 
211 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
212                 false, false)
213