10eae32dcSDimitry Andric //===- CodeLayout.h - Code layout/placement algorithms  ---------*- C++ -*-===//
20eae32dcSDimitry Andric //
30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60eae32dcSDimitry Andric //
70eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
80eae32dcSDimitry Andric //
90eae32dcSDimitry Andric /// \file
100eae32dcSDimitry Andric /// Declares methods and data structures for code layout algorithms.
110eae32dcSDimitry Andric //
120eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
130eae32dcSDimitry Andric 
140eae32dcSDimitry Andric #ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
150eae32dcSDimitry Andric #define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
160eae32dcSDimitry Andric 
17*5f757f3fSDimitry Andric #include "llvm/ADT/ArrayRef.h"
180eae32dcSDimitry Andric 
19*5f757f3fSDimitry Andric #include <utility>
200eae32dcSDimitry Andric #include <vector>
210eae32dcSDimitry Andric 
22*5f757f3fSDimitry Andric namespace llvm::codelayout {
230eae32dcSDimitry Andric 
24bdd1243dSDimitry Andric using EdgeT = std::pair<uint64_t, uint64_t>;
25*5f757f3fSDimitry Andric 
26*5f757f3fSDimitry Andric struct EdgeCount {
27*5f757f3fSDimitry Andric   uint64_t src;
28*5f757f3fSDimitry Andric   uint64_t dst;
29*5f757f3fSDimitry Andric   uint64_t count;
30*5f757f3fSDimitry Andric };
31bdd1243dSDimitry Andric 
320eae32dcSDimitry Andric /// Find a layout of nodes (basic blocks) of a given CFG optimizing jump
330eae32dcSDimitry Andric /// locality and thus processor I-cache utilization. This is achieved via
340eae32dcSDimitry Andric /// increasing the number of fall-through jumps and co-locating frequently
350eae32dcSDimitry Andric /// executed nodes together.
360eae32dcSDimitry Andric /// The nodes are assumed to be indexed by integers from [0, |V|) so that the
370eae32dcSDimitry Andric /// current order is the identity permutation.
380eae32dcSDimitry Andric /// \p NodeSizes: The sizes of the nodes (in bytes).
390eae32dcSDimitry Andric /// \p NodeCounts: The execution counts of the nodes in the profile.
400eae32dcSDimitry Andric /// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The
410eae32dcSDimitry Andric ///    map also defines the edges in CFG and should include 0-count edges.
420eae32dcSDimitry Andric /// \returns The best block order found.
43*5f757f3fSDimitry Andric std::vector<uint64_t> computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
44*5f757f3fSDimitry Andric                                           ArrayRef<uint64_t> NodeCounts,
45*5f757f3fSDimitry Andric                                           ArrayRef<EdgeCount> EdgeCounts);
460eae32dcSDimitry Andric 
470eae32dcSDimitry Andric /// Estimate the "quality" of a given node order in CFG. The higher the score,
480eae32dcSDimitry Andric /// the better the order is. The score is designed to reflect the locality of
490eae32dcSDimitry Andric /// the given order, which is anti-correlated with the number of I-cache misses
500eae32dcSDimitry Andric /// in a typical execution of the function.
51*5f757f3fSDimitry Andric double calcExtTspScore(ArrayRef<uint64_t> Order, ArrayRef<uint64_t> NodeSizes,
52*5f757f3fSDimitry Andric                        ArrayRef<uint64_t> NodeCounts,
53*5f757f3fSDimitry Andric                        ArrayRef<EdgeCount> EdgeCounts);
54bdd1243dSDimitry Andric 
55bdd1243dSDimitry Andric /// Estimate the "quality" of the current node order in CFG.
56*5f757f3fSDimitry Andric double calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
57*5f757f3fSDimitry Andric                        ArrayRef<uint64_t> NodeCounts,
58*5f757f3fSDimitry Andric                        ArrayRef<EdgeCount> EdgeCounts);
590eae32dcSDimitry Andric 
60*5f757f3fSDimitry Andric /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for
61*5f757f3fSDimitry Andric /// the best performance of large-scale front-end bound binaries.
62*5f757f3fSDimitry Andric struct CDSortConfig {
63*5f757f3fSDimitry Andric   /// The size of the cache.
64*5f757f3fSDimitry Andric   unsigned CacheEntries = 16;
65*5f757f3fSDimitry Andric   /// The size of a line in the cache.
66*5f757f3fSDimitry Andric   unsigned CacheSize = 2048;
67*5f757f3fSDimitry Andric   /// The maximum size of a chain to create.
68*5f757f3fSDimitry Andric   unsigned MaxChainSize = 128;
69*5f757f3fSDimitry Andric   /// The power exponent for the distance-based locality.
70*5f757f3fSDimitry Andric   double DistancePower = 0.25;
71*5f757f3fSDimitry Andric   /// The scale factor for the frequency-based locality.
72*5f757f3fSDimitry Andric   double FrequencyScale = 0.25;
73*5f757f3fSDimitry Andric };
74*5f757f3fSDimitry Andric 
75*5f757f3fSDimitry Andric /// Apply a Cache-Directed Sort for functions represented by a call graph.
76*5f757f3fSDimitry Andric /// The placement is done by optimizing the call locality by co-locating
77*5f757f3fSDimitry Andric /// frequently executed functions.
78*5f757f3fSDimitry Andric /// \p FuncSizes: The sizes of the nodes (in bytes).
79*5f757f3fSDimitry Andric /// \p FuncCounts: The execution counts of the nodes in the profile.
80*5f757f3fSDimitry Andric /// \p CallCounts: The execution counts of every edge (jump) in the profile. The
81*5f757f3fSDimitry Andric ///    map also defines the edges in CFG and should include 0-count edges.
82*5f757f3fSDimitry Andric /// \p CallOffsets: The offsets of the calls from their source nodes.
83*5f757f3fSDimitry Andric /// \returns The best function order found.
84*5f757f3fSDimitry Andric std::vector<uint64_t> computeCacheDirectedLayout(
85*5f757f3fSDimitry Andric     ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
86*5f757f3fSDimitry Andric     ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets);
87*5f757f3fSDimitry Andric 
88*5f757f3fSDimitry Andric /// Apply a Cache-Directed Sort with a custom config.
89*5f757f3fSDimitry Andric std::vector<uint64_t> computeCacheDirectedLayout(
90*5f757f3fSDimitry Andric     const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
91*5f757f3fSDimitry Andric     ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
92*5f757f3fSDimitry Andric     ArrayRef<uint64_t> CallOffsets);
93*5f757f3fSDimitry Andric 
94*5f757f3fSDimitry Andric } // namespace llvm::codelayout
950eae32dcSDimitry Andric 
960eae32dcSDimitry Andric #endif // LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
97