11cf9926bSpatrick //===- CallGraphSort.cpp --------------------------------------------------===//
21cf9926bSpatrick //
31cf9926bSpatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41cf9926bSpatrick // See https://llvm.org/LICENSE.txt for license information.
51cf9926bSpatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61cf9926bSpatrick //
71cf9926bSpatrick //===----------------------------------------------------------------------===//
81cf9926bSpatrick ///
91cf9926bSpatrick /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
101cf9926bSpatrick /// about the algorithm.
111cf9926bSpatrick ///
121cf9926bSpatrick //===----------------------------------------------------------------------===//
131cf9926bSpatrick
141cf9926bSpatrick #include "CallGraphSort.h"
15*dfe94b16Srobert #include "COFFLinkerContext.h"
161cf9926bSpatrick #include "InputFiles.h"
171cf9926bSpatrick #include "SymbolTable.h"
181cf9926bSpatrick #include "Symbols.h"
191cf9926bSpatrick #include "lld/Common/ErrorHandler.h"
201cf9926bSpatrick
211cf9926bSpatrick #include <numeric>
221cf9926bSpatrick
231cf9926bSpatrick using namespace llvm;
241cf9926bSpatrick using namespace lld;
251cf9926bSpatrick using namespace lld::coff;
261cf9926bSpatrick
271cf9926bSpatrick namespace {
281cf9926bSpatrick struct Edge {
291cf9926bSpatrick int from;
301cf9926bSpatrick uint64_t weight;
311cf9926bSpatrick };
321cf9926bSpatrick
331cf9926bSpatrick struct Cluster {
Cluster__anon8467269b0111::Cluster341cf9926bSpatrick Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
351cf9926bSpatrick
getDensity__anon8467269b0111::Cluster361cf9926bSpatrick double getDensity() const {
371cf9926bSpatrick if (size == 0)
381cf9926bSpatrick return 0;
391cf9926bSpatrick return double(weight) / double(size);
401cf9926bSpatrick }
411cf9926bSpatrick
421cf9926bSpatrick int next;
431cf9926bSpatrick int prev;
441cf9926bSpatrick uint64_t size;
451cf9926bSpatrick uint64_t weight = 0;
461cf9926bSpatrick uint64_t initialWeight = 0;
471cf9926bSpatrick Edge bestPred = {-1, 0};
481cf9926bSpatrick };
491cf9926bSpatrick
501cf9926bSpatrick class CallGraphSort {
511cf9926bSpatrick public:
52*dfe94b16Srobert CallGraphSort(const COFFLinkerContext &ctx);
531cf9926bSpatrick
541cf9926bSpatrick DenseMap<const SectionChunk *, int> run();
551cf9926bSpatrick
561cf9926bSpatrick private:
571cf9926bSpatrick std::vector<Cluster> clusters;
581cf9926bSpatrick std::vector<const SectionChunk *> sections;
59*dfe94b16Srobert
60*dfe94b16Srobert const COFFLinkerContext &ctx;
611cf9926bSpatrick };
621cf9926bSpatrick
631cf9926bSpatrick // Maximum amount the combined cluster density can be worse than the original
641cf9926bSpatrick // cluster to consider merging.
651cf9926bSpatrick constexpr int MAX_DENSITY_DEGRADATION = 8;
661cf9926bSpatrick
671cf9926bSpatrick // Maximum cluster size in bytes.
681cf9926bSpatrick constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
691cf9926bSpatrick } // end anonymous namespace
701cf9926bSpatrick
711cf9926bSpatrick using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>;
721cf9926bSpatrick
731cf9926bSpatrick // Take the edge list in Config->CallGraphProfile, resolve symbol names to
741cf9926bSpatrick // Symbols, and generate a graph between InputSections with the provided
751cf9926bSpatrick // weights.
CallGraphSort(const COFFLinkerContext & ctx)76*dfe94b16Srobert CallGraphSort::CallGraphSort(const COFFLinkerContext &ctx) : ctx(ctx) {
77*dfe94b16Srobert const MapVector<SectionPair, uint64_t> &profile = ctx.config.callGraphProfile;
781cf9926bSpatrick DenseMap<const SectionChunk *, int> secToCluster;
791cf9926bSpatrick
801cf9926bSpatrick auto getOrCreateNode = [&](const SectionChunk *isec) -> int {
811cf9926bSpatrick auto res = secToCluster.try_emplace(isec, clusters.size());
821cf9926bSpatrick if (res.second) {
831cf9926bSpatrick sections.push_back(isec);
841cf9926bSpatrick clusters.emplace_back(clusters.size(), isec->getSize());
851cf9926bSpatrick }
861cf9926bSpatrick return res.first->second;
871cf9926bSpatrick };
881cf9926bSpatrick
891cf9926bSpatrick // Create the graph.
90*dfe94b16Srobert for (const std::pair<SectionPair, uint64_t> &c : profile) {
911cf9926bSpatrick const auto *fromSec = cast<SectionChunk>(c.first.first->repl);
921cf9926bSpatrick const auto *toSec = cast<SectionChunk>(c.first.second->repl);
931cf9926bSpatrick uint64_t weight = c.second;
941cf9926bSpatrick
951cf9926bSpatrick // Ignore edges between input sections belonging to different output
961cf9926bSpatrick // sections. This is done because otherwise we would end up with clusters
971cf9926bSpatrick // containing input sections that can't actually be placed adjacently in the
981cf9926bSpatrick // output. This messes with the cluster size and density calculations. We
991cf9926bSpatrick // would also end up moving input sections in other output sections without
1001cf9926bSpatrick // moving them closer to what calls them.
101*dfe94b16Srobert if (ctx.getOutputSection(fromSec) != ctx.getOutputSection(toSec))
1021cf9926bSpatrick continue;
1031cf9926bSpatrick
1041cf9926bSpatrick int from = getOrCreateNode(fromSec);
1051cf9926bSpatrick int to = getOrCreateNode(toSec);
1061cf9926bSpatrick
1071cf9926bSpatrick clusters[to].weight += weight;
1081cf9926bSpatrick
1091cf9926bSpatrick if (from == to)
1101cf9926bSpatrick continue;
1111cf9926bSpatrick
1121cf9926bSpatrick // Remember the best edge.
1131cf9926bSpatrick Cluster &toC = clusters[to];
1141cf9926bSpatrick if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
1151cf9926bSpatrick toC.bestPred.from = from;
1161cf9926bSpatrick toC.bestPred.weight = weight;
1171cf9926bSpatrick }
1181cf9926bSpatrick }
1191cf9926bSpatrick for (Cluster &c : clusters)
1201cf9926bSpatrick c.initialWeight = c.weight;
1211cf9926bSpatrick }
1221cf9926bSpatrick
1231cf9926bSpatrick // It's bad to merge clusters which would degrade the density too much.
isNewDensityBad(Cluster & a,Cluster & b)1241cf9926bSpatrick static bool isNewDensityBad(Cluster &a, Cluster &b) {
1251cf9926bSpatrick double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
1261cf9926bSpatrick return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
1271cf9926bSpatrick }
1281cf9926bSpatrick
1291cf9926bSpatrick // Find the leader of V's belonged cluster (represented as an equivalence
1301cf9926bSpatrick // class). We apply union-find path-halving technique (simple to implement) in
1311cf9926bSpatrick // the meantime as it decreases depths and the time complexity.
getLeader(std::vector<int> & leaders,int v)1321cf9926bSpatrick static int getLeader(std::vector<int> &leaders, int v) {
1331cf9926bSpatrick while (leaders[v] != v) {
1341cf9926bSpatrick leaders[v] = leaders[leaders[v]];
1351cf9926bSpatrick v = leaders[v];
1361cf9926bSpatrick }
1371cf9926bSpatrick return v;
1381cf9926bSpatrick }
1391cf9926bSpatrick
mergeClusters(std::vector<Cluster> & cs,Cluster & into,int intoIdx,Cluster & from,int fromIdx)1401cf9926bSpatrick static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
1411cf9926bSpatrick Cluster &from, int fromIdx) {
1421cf9926bSpatrick int tail1 = into.prev, tail2 = from.prev;
1431cf9926bSpatrick into.prev = tail2;
1441cf9926bSpatrick cs[tail2].next = intoIdx;
1451cf9926bSpatrick from.prev = tail1;
1461cf9926bSpatrick cs[tail1].next = fromIdx;
1471cf9926bSpatrick into.size += from.size;
1481cf9926bSpatrick into.weight += from.weight;
1491cf9926bSpatrick from.size = 0;
1501cf9926bSpatrick from.weight = 0;
1511cf9926bSpatrick }
1521cf9926bSpatrick
1531cf9926bSpatrick // Group InputSections into clusters using the Call-Chain Clustering heuristic
1541cf9926bSpatrick // then sort the clusters by density.
run()1551cf9926bSpatrick DenseMap<const SectionChunk *, int> CallGraphSort::run() {
1561cf9926bSpatrick std::vector<int> sorted(clusters.size());
1571cf9926bSpatrick std::vector<int> leaders(clusters.size());
1581cf9926bSpatrick
1591cf9926bSpatrick std::iota(leaders.begin(), leaders.end(), 0);
1601cf9926bSpatrick std::iota(sorted.begin(), sorted.end(), 0);
1611cf9926bSpatrick llvm::stable_sort(sorted, [&](int a, int b) {
1621cf9926bSpatrick return clusters[a].getDensity() > clusters[b].getDensity();
1631cf9926bSpatrick });
1641cf9926bSpatrick
1651cf9926bSpatrick for (int l : sorted) {
1661cf9926bSpatrick // The cluster index is the same as the index of its leader here because
1671cf9926bSpatrick // clusters[L] has not been merged into another cluster yet.
1681cf9926bSpatrick Cluster &c = clusters[l];
1691cf9926bSpatrick
1701cf9926bSpatrick // Don't consider merging if the edge is unlikely.
1711cf9926bSpatrick if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
1721cf9926bSpatrick continue;
1731cf9926bSpatrick
1741cf9926bSpatrick int predL = getLeader(leaders, c.bestPred.from);
1751cf9926bSpatrick if (l == predL)
1761cf9926bSpatrick continue;
1771cf9926bSpatrick
1781cf9926bSpatrick Cluster *predC = &clusters[predL];
1791cf9926bSpatrick if (c.size + predC->size > MAX_CLUSTER_SIZE)
1801cf9926bSpatrick continue;
1811cf9926bSpatrick
1821cf9926bSpatrick if (isNewDensityBad(*predC, c))
1831cf9926bSpatrick continue;
1841cf9926bSpatrick
1851cf9926bSpatrick leaders[l] = predL;
1861cf9926bSpatrick mergeClusters(clusters, *predC, predL, c, l);
1871cf9926bSpatrick }
1881cf9926bSpatrick
1891cf9926bSpatrick // Sort remaining non-empty clusters by density.
1901cf9926bSpatrick sorted.clear();
1911cf9926bSpatrick for (int i = 0, e = (int)clusters.size(); i != e; ++i)
1921cf9926bSpatrick if (clusters[i].size > 0)
1931cf9926bSpatrick sorted.push_back(i);
1941cf9926bSpatrick llvm::stable_sort(sorted, [&](int a, int b) {
1951cf9926bSpatrick return clusters[a].getDensity() > clusters[b].getDensity();
1961cf9926bSpatrick });
1971cf9926bSpatrick
1981cf9926bSpatrick DenseMap<const SectionChunk *, int> orderMap;
1991cf9926bSpatrick // Sections will be sorted by increasing order. Absent sections will have
2001cf9926bSpatrick // priority 0 and be placed at the end of sections.
2011cf9926bSpatrick int curOrder = INT_MIN;
2021cf9926bSpatrick for (int leader : sorted) {
2031cf9926bSpatrick for (int i = leader;;) {
2041cf9926bSpatrick orderMap[sections[i]] = curOrder++;
2051cf9926bSpatrick i = clusters[i].next;
2061cf9926bSpatrick if (i == leader)
2071cf9926bSpatrick break;
2081cf9926bSpatrick }
2091cf9926bSpatrick }
210*dfe94b16Srobert if (!ctx.config.printSymbolOrder.empty()) {
2111cf9926bSpatrick std::error_code ec;
212*dfe94b16Srobert raw_fd_ostream os(ctx.config.printSymbolOrder, ec, sys::fs::OF_None);
2131cf9926bSpatrick if (ec) {
214*dfe94b16Srobert error("cannot open " + ctx.config.printSymbolOrder + ": " + ec.message());
2151cf9926bSpatrick return orderMap;
2161cf9926bSpatrick }
2171cf9926bSpatrick // Print the symbols ordered by C3, in the order of increasing curOrder
2181cf9926bSpatrick // Instead of sorting all the orderMap, just repeat the loops above.
2191cf9926bSpatrick for (int leader : sorted)
2201cf9926bSpatrick for (int i = leader;;) {
2211cf9926bSpatrick const SectionChunk *sc = sections[i];
2221cf9926bSpatrick
2231cf9926bSpatrick // Search all the symbols in the file of the section
2241cf9926bSpatrick // and find out a DefinedCOFF symbol with name that is within the
2251cf9926bSpatrick // section.
2261cf9926bSpatrick for (Symbol *sym : sc->file->getSymbols())
2271cf9926bSpatrick if (auto *d = dyn_cast_or_null<DefinedCOFF>(sym))
2281cf9926bSpatrick // Filter out non-COMDAT symbols and section symbols.
2291cf9926bSpatrick if (d->isCOMDAT && !d->getCOFFSymbol().isSection() &&
2301cf9926bSpatrick sc == d->getChunk())
2311cf9926bSpatrick os << sym->getName() << "\n";
2321cf9926bSpatrick i = clusters[i].next;
2331cf9926bSpatrick if (i == leader)
2341cf9926bSpatrick break;
2351cf9926bSpatrick }
2361cf9926bSpatrick }
2371cf9926bSpatrick
2381cf9926bSpatrick return orderMap;
2391cf9926bSpatrick }
2401cf9926bSpatrick
2411cf9926bSpatrick // Sort sections by the profile data provided by /call-graph-ordering-file
2421cf9926bSpatrick //
2431cf9926bSpatrick // This first builds a call graph based on the profile data then merges sections
2441cf9926bSpatrick // according to the C³ heuristic. All clusters are then sorted by a density
2451cf9926bSpatrick // metric to further improve locality.
246*dfe94b16Srobert DenseMap<const SectionChunk *, int>
computeCallGraphProfileOrder(const COFFLinkerContext & ctx)247*dfe94b16Srobert coff::computeCallGraphProfileOrder(const COFFLinkerContext &ctx) {
248*dfe94b16Srobert return CallGraphSort(ctx).run();
2491cf9926bSpatrick }
250