1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains a DAG scheduling mutation to cluster shader
10 /// exports.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUExportClustering.h"
15 #include "AMDGPUSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIInstrInfo.h"
18
19 using namespace llvm;
20
21 namespace {
22
23 class ExportClustering : public ScheduleDAGMutation {
24 public:
ExportClustering()25 ExportClustering() {}
26 void apply(ScheduleDAGInstrs *DAG) override;
27 };
28
isExport(const SUnit & SU)29 static bool isExport(const SUnit &SU) {
30 const MachineInstr *MI = SU.getInstr();
31 return MI->getOpcode() == AMDGPU::EXP ||
32 MI->getOpcode() == AMDGPU::EXP_DONE;
33 }
34
isPositionExport(const SIInstrInfo * TII,SUnit * SU)35 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
36 const MachineInstr *MI = SU->getInstr();
37 int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
38 return Imm >= 12 && Imm <= 15;
39 }
40
sortChain(const SIInstrInfo * TII,SmallVector<SUnit *,8> & Chain,unsigned PosCount)41 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
42 unsigned PosCount) {
43 if (!PosCount || PosCount == Chain.size())
44 return;
45
46 // Position exports should occur as soon as possible in the shader
47 // for optimal performance. This moves position exports before
48 // other exports while preserving the order within different export
49 // types (pos or other).
50 SmallVector<SUnit *, 8> Copy(Chain);
51 unsigned PosIdx = 0;
52 unsigned OtherIdx = PosCount;
53 for (SUnit *SU : Copy) {
54 if (isPositionExport(TII, SU))
55 Chain[PosIdx++] = SU;
56 else
57 Chain[OtherIdx++] = SU;
58 }
59 }
60
buildCluster(ArrayRef<SUnit * > Exports,ScheduleDAGInstrs * DAG)61 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
62 SUnit *ChainHead = Exports.front();
63
64 // Now construct cluster from chain by adding new edges.
65 for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
66 SUnit *SUa = Exports[Idx];
67 SUnit *SUb = Exports[Idx + 1];
68
69 // Copy all dependencies to the head of the chain to avoid any
70 // computation being inserted into the chain.
71 for (const SDep &Pred : SUb->Preds) {
72 SUnit *PredSU = Pred.getSUnit();
73 if (!isExport(*PredSU) && !Pred.isWeak())
74 DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
75 }
76
77 // New barrier edge ordering exports
78 DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
79 // Also add cluster edge
80 DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
81 }
82 }
83
removeExportDependencies(ScheduleDAGInstrs * DAG,SUnit & SU)84 static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
85 SmallVector<SDep, 2> ToAdd, ToRemove;
86
87 for (const SDep &Pred : SU.Preds) {
88 SUnit *PredSU = Pred.getSUnit();
89 if (Pred.isBarrier() && isExport(*PredSU)) {
90 ToRemove.push_back(Pred);
91 if (isExport(SU))
92 continue;
93
94 // If we remove a barrier we need to copy dependencies
95 // from the predecessor to maintain order.
96 for (const SDep &ExportPred : PredSU->Preds) {
97 SUnit *ExportPredSU = ExportPred.getSUnit();
98 if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
99 ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
100 }
101 }
102 }
103
104 for (SDep Pred : ToRemove)
105 SU.removePred(Pred);
106 for (SDep Pred : ToAdd)
107 DAG->addEdge(&SU, Pred);
108 }
109
apply(ScheduleDAGInstrs * DAG)110 void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
111 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
112
113 SmallVector<SUnit *, 8> Chain;
114
115 // Pass through DAG gathering a list of exports and removing barrier edges
116 // creating dependencies on exports. Freeing exports of successor edges
117 // allows more scheduling freedom, and nothing should be order dependent
118 // on exports. Edges will be added later to order the exports.
119 unsigned PosCount = 0;
120 for (SUnit &SU : DAG->SUnits) {
121 if (!isExport(SU))
122 continue;
123
124 Chain.push_back(&SU);
125 if (isPositionExport(TII, &SU))
126 PosCount++;
127
128 removeExportDependencies(DAG, SU);
129
130 SmallVector<SDep, 4> Succs(SU.Succs);
131 for (SDep Succ : Succs)
132 removeExportDependencies(DAG, *Succ.getSUnit());
133 }
134
135 // Apply clustering if there are multiple exports
136 if (Chain.size() > 1) {
137 sortChain(TII, Chain, PosCount);
138 buildCluster(Chain, DAG);
139 }
140 }
141
142 } // end namespace
143
144 namespace llvm {
145
createAMDGPUExportClusteringDAGMutation()146 std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
147 return std::make_unique<ExportClustering>();
148 }
149
150 } // end namespace llvm
151