10eae32dcSDimitry Andric //===- CodeLayout.h - Code layout/placement algorithms ---------*- C++ -*-===// 20eae32dcSDimitry Andric // 30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60eae32dcSDimitry Andric // 70eae32dcSDimitry Andric //===----------------------------------------------------------------------===// 80eae32dcSDimitry Andric // 90eae32dcSDimitry Andric /// \file 100eae32dcSDimitry Andric /// Declares methods and data structures for code layout algorithms. 110eae32dcSDimitry Andric // 120eae32dcSDimitry Andric //===----------------------------------------------------------------------===// 130eae32dcSDimitry Andric 140eae32dcSDimitry Andric #ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 150eae32dcSDimitry Andric #define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 160eae32dcSDimitry Andric 17*5f757f3fSDimitry Andric #include "llvm/ADT/ArrayRef.h" 180eae32dcSDimitry Andric 19*5f757f3fSDimitry Andric #include <utility> 200eae32dcSDimitry Andric #include <vector> 210eae32dcSDimitry Andric 22*5f757f3fSDimitry Andric namespace llvm::codelayout { 230eae32dcSDimitry Andric 24bdd1243dSDimitry Andric using EdgeT = std::pair<uint64_t, uint64_t>; 25*5f757f3fSDimitry Andric 26*5f757f3fSDimitry Andric struct EdgeCount { 27*5f757f3fSDimitry Andric uint64_t src; 28*5f757f3fSDimitry Andric uint64_t dst; 29*5f757f3fSDimitry Andric uint64_t count; 30*5f757f3fSDimitry Andric }; 31bdd1243dSDimitry Andric 320eae32dcSDimitry Andric /// Find a layout of nodes (basic blocks) of a given CFG optimizing jump 330eae32dcSDimitry Andric /// locality and thus processor I-cache utilization. This is achieved via 340eae32dcSDimitry Andric /// increasing the number of fall-through jumps and co-locating frequently 350eae32dcSDimitry Andric /// executed nodes together. 360eae32dcSDimitry Andric /// The nodes are assumed to be indexed by integers from [0, |V|) so that the 370eae32dcSDimitry Andric /// current order is the identity permutation. 380eae32dcSDimitry Andric /// \p NodeSizes: The sizes of the nodes (in bytes). 390eae32dcSDimitry Andric /// \p NodeCounts: The execution counts of the nodes in the profile. 400eae32dcSDimitry Andric /// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The 410eae32dcSDimitry Andric /// map also defines the edges in CFG and should include 0-count edges. 420eae32dcSDimitry Andric /// \returns The best block order found. 43*5f757f3fSDimitry Andric std::vector<uint64_t> computeExtTspLayout(ArrayRef<uint64_t> NodeSizes, 44*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeCounts, 45*5f757f3fSDimitry Andric ArrayRef<EdgeCount> EdgeCounts); 460eae32dcSDimitry Andric 470eae32dcSDimitry Andric /// Estimate the "quality" of a given node order in CFG. The higher the score, 480eae32dcSDimitry Andric /// the better the order is. The score is designed to reflect the locality of 490eae32dcSDimitry Andric /// the given order, which is anti-correlated with the number of I-cache misses 500eae32dcSDimitry Andric /// in a typical execution of the function. 51*5f757f3fSDimitry Andric double calcExtTspScore(ArrayRef<uint64_t> Order, ArrayRef<uint64_t> NodeSizes, 52*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeCounts, 53*5f757f3fSDimitry Andric ArrayRef<EdgeCount> EdgeCounts); 54bdd1243dSDimitry Andric 55bdd1243dSDimitry Andric /// Estimate the "quality" of the current node order in CFG. 56*5f757f3fSDimitry Andric double calcExtTspScore(ArrayRef<uint64_t> NodeSizes, 57*5f757f3fSDimitry Andric ArrayRef<uint64_t> NodeCounts, 58*5f757f3fSDimitry Andric ArrayRef<EdgeCount> EdgeCounts); 590eae32dcSDimitry Andric 60*5f757f3fSDimitry Andric /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for 61*5f757f3fSDimitry Andric /// the best performance of large-scale front-end bound binaries. 62*5f757f3fSDimitry Andric struct CDSortConfig { 63*5f757f3fSDimitry Andric /// The size of the cache. 64*5f757f3fSDimitry Andric unsigned CacheEntries = 16; 65*5f757f3fSDimitry Andric /// The size of a line in the cache. 66*5f757f3fSDimitry Andric unsigned CacheSize = 2048; 67*5f757f3fSDimitry Andric /// The maximum size of a chain to create. 68*5f757f3fSDimitry Andric unsigned MaxChainSize = 128; 69*5f757f3fSDimitry Andric /// The power exponent for the distance-based locality. 70*5f757f3fSDimitry Andric double DistancePower = 0.25; 71*5f757f3fSDimitry Andric /// The scale factor for the frequency-based locality. 72*5f757f3fSDimitry Andric double FrequencyScale = 0.25; 73*5f757f3fSDimitry Andric }; 74*5f757f3fSDimitry Andric 75*5f757f3fSDimitry Andric /// Apply a Cache-Directed Sort for functions represented by a call graph. 76*5f757f3fSDimitry Andric /// The placement is done by optimizing the call locality by co-locating 77*5f757f3fSDimitry Andric /// frequently executed functions. 78*5f757f3fSDimitry Andric /// \p FuncSizes: The sizes of the nodes (in bytes). 79*5f757f3fSDimitry Andric /// \p FuncCounts: The execution counts of the nodes in the profile. 80*5f757f3fSDimitry Andric /// \p CallCounts: The execution counts of every edge (jump) in the profile. The 81*5f757f3fSDimitry Andric /// map also defines the edges in CFG and should include 0-count edges. 82*5f757f3fSDimitry Andric /// \p CallOffsets: The offsets of the calls from their source nodes. 83*5f757f3fSDimitry Andric /// \returns The best function order found. 84*5f757f3fSDimitry Andric std::vector<uint64_t> computeCacheDirectedLayout( 85*5f757f3fSDimitry Andric ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts, 86*5f757f3fSDimitry Andric ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets); 87*5f757f3fSDimitry Andric 88*5f757f3fSDimitry Andric /// Apply a Cache-Directed Sort with a custom config. 89*5f757f3fSDimitry Andric std::vector<uint64_t> computeCacheDirectedLayout( 90*5f757f3fSDimitry Andric const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes, 91*5f757f3fSDimitry Andric ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts, 92*5f757f3fSDimitry Andric ArrayRef<uint64_t> CallOffsets); 93*5f757f3fSDimitry Andric 94*5f757f3fSDimitry Andric } // namespace llvm::codelayout 950eae32dcSDimitry Andric 960eae32dcSDimitry Andric #endif // LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 97