1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering  -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains a DAG scheduling mutation to cluster shader
10 ///       exports.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUExportClustering.h"
15 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16 #include "SIInstrInfo.h"
17 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
18 
19 using namespace llvm;
20 
21 namespace {
22 
23 class ExportClustering : public ScheduleDAGMutation {
24 public:
25   ExportClustering() = default;
26   void apply(ScheduleDAGInstrs *DAG) override;
27 };
28 
29 static bool isExport(const SUnit &SU) {
30   return SIInstrInfo::isEXP(*SU.getInstr());
31 }
32 
33 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
34   const MachineInstr *MI = SU->getInstr();
35   unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
36   return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
37 }
38 
39 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
40                       unsigned PosCount) {
41   if (!PosCount || PosCount == Chain.size())
42     return;
43 
44   // Position exports should occur as soon as possible in the shader
45   // for optimal performance.  This moves position exports before
46   // other exports while preserving the order within different export
47   // types (pos or other).
48   SmallVector<SUnit *, 8> Copy(Chain);
49   unsigned PosIdx = 0;
50   unsigned OtherIdx = PosCount;
51   for (SUnit *SU : Copy) {
52     if (isPositionExport(TII, SU))
53       Chain[PosIdx++] = SU;
54     else
55       Chain[OtherIdx++] = SU;
56   }
57 }
58 
59 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
60   SUnit *ChainHead = Exports.front();
61 
62   // Now construct cluster from chain by adding new edges.
63   for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
64     SUnit *SUa = Exports[Idx];
65     SUnit *SUb = Exports[Idx + 1];
66 
67     // Copy all dependencies to the head of the chain to avoid any
68     // computation being inserted into the chain.
69     for (const SDep &Pred : SUb->Preds) {
70       SUnit *PredSU = Pred.getSUnit();
71       if (!isExport(*PredSU) && !Pred.isWeak())
72         DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
73     }
74 
75     // New barrier edge ordering exports
76     DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
77     // Also add cluster edge
78     DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
79   }
80 }
81 
82 static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
83   SmallVector<SDep, 2> ToAdd, ToRemove;
84 
85   for (const SDep &Pred : SU.Preds) {
86     SUnit *PredSU = Pred.getSUnit();
87     if (Pred.isBarrier() && isExport(*PredSU)) {
88       ToRemove.push_back(Pred);
89       if (isExport(SU))
90         continue;
91 
92       // If we remove a barrier we need to copy dependencies
93       // from the predecessor to maintain order.
94       for (const SDep &ExportPred : PredSU->Preds) {
95         SUnit *ExportPredSU = ExportPred.getSUnit();
96         if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
97           ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
98       }
99     }
100   }
101 
102   for (SDep Pred : ToRemove)
103     SU.removePred(Pred);
104   for (SDep Pred : ToAdd)
105     DAG->addEdge(&SU, Pred);
106 }
107 
108 void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
109   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
110 
111   SmallVector<SUnit *, 8> Chain;
112 
113   // Pass through DAG gathering a list of exports and removing barrier edges
114   // creating dependencies on exports. Freeing exports of successor edges
115   // allows more scheduling freedom, and nothing should be order dependent
116   // on exports.  Edges will be added later to order the exports.
117   unsigned PosCount = 0;
118   for (SUnit &SU : DAG->SUnits) {
119     if (!isExport(SU))
120       continue;
121 
122     Chain.push_back(&SU);
123     if (isPositionExport(TII, &SU))
124       PosCount++;
125 
126     removeExportDependencies(DAG, SU);
127 
128     SmallVector<SDep, 4> Succs(SU.Succs);
129     for (SDep Succ : Succs)
130       removeExportDependencies(DAG, *Succ.getSUnit());
131   }
132 
133   // Apply clustering if there are multiple exports
134   if (Chain.size() > 1) {
135     sortChain(TII, Chain, PosCount);
136     buildCluster(Chain, DAG);
137   }
138 }
139 
140 } // end namespace
141 
142 namespace llvm {
143 
144 std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
145   return std::make_unique<ExportClustering>();
146 }
147 
148 } // end namespace llvm
149