1*e8d8bef9SDimitry Andric //===- CallGraphSort.cpp --------------------------------------------------===//
2*e8d8bef9SDimitry Andric //
3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e8d8bef9SDimitry Andric //
7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8*e8d8bef9SDimitry Andric ///
9*e8d8bef9SDimitry Andric /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
10*e8d8bef9SDimitry Andric /// about the algorithm.
11*e8d8bef9SDimitry Andric ///
12*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
13*e8d8bef9SDimitry Andric 
14*e8d8bef9SDimitry Andric #include "CallGraphSort.h"
15*e8d8bef9SDimitry Andric #include "InputFiles.h"
16*e8d8bef9SDimitry Andric #include "SymbolTable.h"
17*e8d8bef9SDimitry Andric #include "Symbols.h"
18*e8d8bef9SDimitry Andric #include "lld/Common/ErrorHandler.h"
19*e8d8bef9SDimitry Andric 
20*e8d8bef9SDimitry Andric #include <numeric>
21*e8d8bef9SDimitry Andric 
22*e8d8bef9SDimitry Andric using namespace llvm;
23*e8d8bef9SDimitry Andric using namespace lld;
24*e8d8bef9SDimitry Andric using namespace lld::coff;
25*e8d8bef9SDimitry Andric 
26*e8d8bef9SDimitry Andric namespace {
27*e8d8bef9SDimitry Andric struct Edge {
28*e8d8bef9SDimitry Andric   int from;
29*e8d8bef9SDimitry Andric   uint64_t weight;
30*e8d8bef9SDimitry Andric };
31*e8d8bef9SDimitry Andric 
32*e8d8bef9SDimitry Andric struct Cluster {
33*e8d8bef9SDimitry Andric   Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
34*e8d8bef9SDimitry Andric 
35*e8d8bef9SDimitry Andric   double getDensity() const {
36*e8d8bef9SDimitry Andric     if (size == 0)
37*e8d8bef9SDimitry Andric       return 0;
38*e8d8bef9SDimitry Andric     return double(weight) / double(size);
39*e8d8bef9SDimitry Andric   }
40*e8d8bef9SDimitry Andric 
41*e8d8bef9SDimitry Andric   int next;
42*e8d8bef9SDimitry Andric   int prev;
43*e8d8bef9SDimitry Andric   uint64_t size;
44*e8d8bef9SDimitry Andric   uint64_t weight = 0;
45*e8d8bef9SDimitry Andric   uint64_t initialWeight = 0;
46*e8d8bef9SDimitry Andric   Edge bestPred = {-1, 0};
47*e8d8bef9SDimitry Andric };
48*e8d8bef9SDimitry Andric 
49*e8d8bef9SDimitry Andric class CallGraphSort {
50*e8d8bef9SDimitry Andric public:
51*e8d8bef9SDimitry Andric   CallGraphSort();
52*e8d8bef9SDimitry Andric 
53*e8d8bef9SDimitry Andric   DenseMap<const SectionChunk *, int> run();
54*e8d8bef9SDimitry Andric 
55*e8d8bef9SDimitry Andric private:
56*e8d8bef9SDimitry Andric   std::vector<Cluster> clusters;
57*e8d8bef9SDimitry Andric   std::vector<const SectionChunk *> sections;
58*e8d8bef9SDimitry Andric };
59*e8d8bef9SDimitry Andric 
60*e8d8bef9SDimitry Andric // Maximum amount the combined cluster density can be worse than the original
61*e8d8bef9SDimitry Andric // cluster to consider merging.
62*e8d8bef9SDimitry Andric constexpr int MAX_DENSITY_DEGRADATION = 8;
63*e8d8bef9SDimitry Andric 
64*e8d8bef9SDimitry Andric // Maximum cluster size in bytes.
65*e8d8bef9SDimitry Andric constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
66*e8d8bef9SDimitry Andric } // end anonymous namespace
67*e8d8bef9SDimitry Andric 
68*e8d8bef9SDimitry Andric using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>;
69*e8d8bef9SDimitry Andric 
70*e8d8bef9SDimitry Andric // Take the edge list in Config->CallGraphProfile, resolve symbol names to
71*e8d8bef9SDimitry Andric // Symbols, and generate a graph between InputSections with the provided
72*e8d8bef9SDimitry Andric // weights.
73*e8d8bef9SDimitry Andric CallGraphSort::CallGraphSort() {
74*e8d8bef9SDimitry Andric   MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
75*e8d8bef9SDimitry Andric   DenseMap<const SectionChunk *, int> secToCluster;
76*e8d8bef9SDimitry Andric 
77*e8d8bef9SDimitry Andric   auto getOrCreateNode = [&](const SectionChunk *isec) -> int {
78*e8d8bef9SDimitry Andric     auto res = secToCluster.try_emplace(isec, clusters.size());
79*e8d8bef9SDimitry Andric     if (res.second) {
80*e8d8bef9SDimitry Andric       sections.push_back(isec);
81*e8d8bef9SDimitry Andric       clusters.emplace_back(clusters.size(), isec->getSize());
82*e8d8bef9SDimitry Andric     }
83*e8d8bef9SDimitry Andric     return res.first->second;
84*e8d8bef9SDimitry Andric   };
85*e8d8bef9SDimitry Andric 
86*e8d8bef9SDimitry Andric   // Create the graph.
87*e8d8bef9SDimitry Andric   for (std::pair<SectionPair, uint64_t> &c : profile) {
88*e8d8bef9SDimitry Andric     const auto *fromSec = cast<SectionChunk>(c.first.first->repl);
89*e8d8bef9SDimitry Andric     const auto *toSec = cast<SectionChunk>(c.first.second->repl);
90*e8d8bef9SDimitry Andric     uint64_t weight = c.second;
91*e8d8bef9SDimitry Andric 
92*e8d8bef9SDimitry Andric     // Ignore edges between input sections belonging to different output
93*e8d8bef9SDimitry Andric     // sections.  This is done because otherwise we would end up with clusters
94*e8d8bef9SDimitry Andric     // containing input sections that can't actually be placed adjacently in the
95*e8d8bef9SDimitry Andric     // output.  This messes with the cluster size and density calculations.  We
96*e8d8bef9SDimitry Andric     // would also end up moving input sections in other output sections without
97*e8d8bef9SDimitry Andric     // moving them closer to what calls them.
98*e8d8bef9SDimitry Andric     if (fromSec->getOutputSection() != toSec->getOutputSection())
99*e8d8bef9SDimitry Andric       continue;
100*e8d8bef9SDimitry Andric 
101*e8d8bef9SDimitry Andric     int from = getOrCreateNode(fromSec);
102*e8d8bef9SDimitry Andric     int to = getOrCreateNode(toSec);
103*e8d8bef9SDimitry Andric 
104*e8d8bef9SDimitry Andric     clusters[to].weight += weight;
105*e8d8bef9SDimitry Andric 
106*e8d8bef9SDimitry Andric     if (from == to)
107*e8d8bef9SDimitry Andric       continue;
108*e8d8bef9SDimitry Andric 
109*e8d8bef9SDimitry Andric     // Remember the best edge.
110*e8d8bef9SDimitry Andric     Cluster &toC = clusters[to];
111*e8d8bef9SDimitry Andric     if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
112*e8d8bef9SDimitry Andric       toC.bestPred.from = from;
113*e8d8bef9SDimitry Andric       toC.bestPred.weight = weight;
114*e8d8bef9SDimitry Andric     }
115*e8d8bef9SDimitry Andric   }
116*e8d8bef9SDimitry Andric   for (Cluster &c : clusters)
117*e8d8bef9SDimitry Andric     c.initialWeight = c.weight;
118*e8d8bef9SDimitry Andric }
119*e8d8bef9SDimitry Andric 
120*e8d8bef9SDimitry Andric // It's bad to merge clusters which would degrade the density too much.
121*e8d8bef9SDimitry Andric static bool isNewDensityBad(Cluster &a, Cluster &b) {
122*e8d8bef9SDimitry Andric   double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
123*e8d8bef9SDimitry Andric   return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
124*e8d8bef9SDimitry Andric }
125*e8d8bef9SDimitry Andric 
126*e8d8bef9SDimitry Andric // Find the leader of V's belonged cluster (represented as an equivalence
127*e8d8bef9SDimitry Andric // class). We apply union-find path-halving technique (simple to implement) in
128*e8d8bef9SDimitry Andric // the meantime as it decreases depths and the time complexity.
129*e8d8bef9SDimitry Andric static int getLeader(std::vector<int> &leaders, int v) {
130*e8d8bef9SDimitry Andric   while (leaders[v] != v) {
131*e8d8bef9SDimitry Andric     leaders[v] = leaders[leaders[v]];
132*e8d8bef9SDimitry Andric     v = leaders[v];
133*e8d8bef9SDimitry Andric   }
134*e8d8bef9SDimitry Andric   return v;
135*e8d8bef9SDimitry Andric }
136*e8d8bef9SDimitry Andric 
137*e8d8bef9SDimitry Andric static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
138*e8d8bef9SDimitry Andric                           Cluster &from, int fromIdx) {
139*e8d8bef9SDimitry Andric   int tail1 = into.prev, tail2 = from.prev;
140*e8d8bef9SDimitry Andric   into.prev = tail2;
141*e8d8bef9SDimitry Andric   cs[tail2].next = intoIdx;
142*e8d8bef9SDimitry Andric   from.prev = tail1;
143*e8d8bef9SDimitry Andric   cs[tail1].next = fromIdx;
144*e8d8bef9SDimitry Andric   into.size += from.size;
145*e8d8bef9SDimitry Andric   into.weight += from.weight;
146*e8d8bef9SDimitry Andric   from.size = 0;
147*e8d8bef9SDimitry Andric   from.weight = 0;
148*e8d8bef9SDimitry Andric }
149*e8d8bef9SDimitry Andric 
150*e8d8bef9SDimitry Andric // Group InputSections into clusters using the Call-Chain Clustering heuristic
151*e8d8bef9SDimitry Andric // then sort the clusters by density.
152*e8d8bef9SDimitry Andric DenseMap<const SectionChunk *, int> CallGraphSort::run() {
153*e8d8bef9SDimitry Andric   std::vector<int> sorted(clusters.size());
154*e8d8bef9SDimitry Andric   std::vector<int> leaders(clusters.size());
155*e8d8bef9SDimitry Andric 
156*e8d8bef9SDimitry Andric   std::iota(leaders.begin(), leaders.end(), 0);
157*e8d8bef9SDimitry Andric   std::iota(sorted.begin(), sorted.end(), 0);
158*e8d8bef9SDimitry Andric   llvm::stable_sort(sorted, [&](int a, int b) {
159*e8d8bef9SDimitry Andric     return clusters[a].getDensity() > clusters[b].getDensity();
160*e8d8bef9SDimitry Andric   });
161*e8d8bef9SDimitry Andric 
162*e8d8bef9SDimitry Andric   for (int l : sorted) {
163*e8d8bef9SDimitry Andric     // The cluster index is the same as the index of its leader here because
164*e8d8bef9SDimitry Andric     // clusters[L] has not been merged into another cluster yet.
165*e8d8bef9SDimitry Andric     Cluster &c = clusters[l];
166*e8d8bef9SDimitry Andric 
167*e8d8bef9SDimitry Andric     // Don't consider merging if the edge is unlikely.
168*e8d8bef9SDimitry Andric     if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
169*e8d8bef9SDimitry Andric       continue;
170*e8d8bef9SDimitry Andric 
171*e8d8bef9SDimitry Andric     int predL = getLeader(leaders, c.bestPred.from);
172*e8d8bef9SDimitry Andric     if (l == predL)
173*e8d8bef9SDimitry Andric       continue;
174*e8d8bef9SDimitry Andric 
175*e8d8bef9SDimitry Andric     Cluster *predC = &clusters[predL];
176*e8d8bef9SDimitry Andric     if (c.size + predC->size > MAX_CLUSTER_SIZE)
177*e8d8bef9SDimitry Andric       continue;
178*e8d8bef9SDimitry Andric 
179*e8d8bef9SDimitry Andric     if (isNewDensityBad(*predC, c))
180*e8d8bef9SDimitry Andric       continue;
181*e8d8bef9SDimitry Andric 
182*e8d8bef9SDimitry Andric     leaders[l] = predL;
183*e8d8bef9SDimitry Andric     mergeClusters(clusters, *predC, predL, c, l);
184*e8d8bef9SDimitry Andric   }
185*e8d8bef9SDimitry Andric 
186*e8d8bef9SDimitry Andric   // Sort remaining non-empty clusters by density.
187*e8d8bef9SDimitry Andric   sorted.clear();
188*e8d8bef9SDimitry Andric   for (int i = 0, e = (int)clusters.size(); i != e; ++i)
189*e8d8bef9SDimitry Andric     if (clusters[i].size > 0)
190*e8d8bef9SDimitry Andric       sorted.push_back(i);
191*e8d8bef9SDimitry Andric   llvm::stable_sort(sorted, [&](int a, int b) {
192*e8d8bef9SDimitry Andric     return clusters[a].getDensity() > clusters[b].getDensity();
193*e8d8bef9SDimitry Andric   });
194*e8d8bef9SDimitry Andric 
195*e8d8bef9SDimitry Andric   DenseMap<const SectionChunk *, int> orderMap;
196*e8d8bef9SDimitry Andric   // Sections will be sorted by increasing order. Absent sections will have
197*e8d8bef9SDimitry Andric   // priority 0 and be placed at the end of sections.
198*e8d8bef9SDimitry Andric   int curOrder = INT_MIN;
199*e8d8bef9SDimitry Andric   for (int leader : sorted) {
200*e8d8bef9SDimitry Andric     for (int i = leader;;) {
201*e8d8bef9SDimitry Andric       orderMap[sections[i]] = curOrder++;
202*e8d8bef9SDimitry Andric       i = clusters[i].next;
203*e8d8bef9SDimitry Andric       if (i == leader)
204*e8d8bef9SDimitry Andric         break;
205*e8d8bef9SDimitry Andric     }
206*e8d8bef9SDimitry Andric   }
207*e8d8bef9SDimitry Andric   if (!config->printSymbolOrder.empty()) {
208*e8d8bef9SDimitry Andric     std::error_code ec;
209*e8d8bef9SDimitry Andric     raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
210*e8d8bef9SDimitry Andric     if (ec) {
211*e8d8bef9SDimitry Andric       error("cannot open " + config->printSymbolOrder + ": " + ec.message());
212*e8d8bef9SDimitry Andric       return orderMap;
213*e8d8bef9SDimitry Andric     }
214*e8d8bef9SDimitry Andric     // Print the symbols ordered by C3, in the order of increasing curOrder
215*e8d8bef9SDimitry Andric     // Instead of sorting all the orderMap, just repeat the loops above.
216*e8d8bef9SDimitry Andric     for (int leader : sorted)
217*e8d8bef9SDimitry Andric       for (int i = leader;;) {
218*e8d8bef9SDimitry Andric         const SectionChunk *sc = sections[i];
219*e8d8bef9SDimitry Andric 
220*e8d8bef9SDimitry Andric         // Search all the symbols in the file of the section
221*e8d8bef9SDimitry Andric         // and find out a DefinedCOFF symbol with name that is within the
222*e8d8bef9SDimitry Andric         // section.
223*e8d8bef9SDimitry Andric         for (Symbol *sym : sc->file->getSymbols())
224*e8d8bef9SDimitry Andric           if (auto *d = dyn_cast_or_null<DefinedCOFF>(sym))
225*e8d8bef9SDimitry Andric             // Filter out non-COMDAT symbols and section symbols.
226*e8d8bef9SDimitry Andric             if (d->isCOMDAT && !d->getCOFFSymbol().isSection() &&
227*e8d8bef9SDimitry Andric                 sc == d->getChunk())
228*e8d8bef9SDimitry Andric               os << sym->getName() << "\n";
229*e8d8bef9SDimitry Andric         i = clusters[i].next;
230*e8d8bef9SDimitry Andric         if (i == leader)
231*e8d8bef9SDimitry Andric           break;
232*e8d8bef9SDimitry Andric       }
233*e8d8bef9SDimitry Andric   }
234*e8d8bef9SDimitry Andric 
235*e8d8bef9SDimitry Andric   return orderMap;
236*e8d8bef9SDimitry Andric }
237*e8d8bef9SDimitry Andric 
238*e8d8bef9SDimitry Andric // Sort sections by the profile data provided by  /call-graph-ordering-file
239*e8d8bef9SDimitry Andric //
240*e8d8bef9SDimitry Andric // This first builds a call graph based on the profile data then merges sections
241*e8d8bef9SDimitry Andric // according to the C³ heuristic. All clusters are then sorted by a density
242*e8d8bef9SDimitry Andric // metric to further improve locality.
243*e8d8bef9SDimitry Andric DenseMap<const SectionChunk *, int> coff::computeCallGraphProfileOrder() {
244*e8d8bef9SDimitry Andric   return CallGraphSort().run();
245*e8d8bef9SDimitry Andric }
246