1*da58b97aSjoerg //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===// 2*da58b97aSjoerg // 3*da58b97aSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*da58b97aSjoerg // See https://llvm.org/LICENSE.txt for license information. 5*da58b97aSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*da58b97aSjoerg // 7*da58b97aSjoerg //===----------------------------------------------------------------------===// 8*da58b97aSjoerg 9*da58b97aSjoerg #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H 10*da58b97aSjoerg #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H 11*da58b97aSjoerg #include "CSPreInliner.h" 12*da58b97aSjoerg #include "ErrorHandling.h" 13*da58b97aSjoerg #include "PerfReader.h" 14*da58b97aSjoerg #include "ProfiledBinary.h" 15*da58b97aSjoerg #include "llvm/ProfileData/SampleProfWriter.h" 16*da58b97aSjoerg #include <memory> 17*da58b97aSjoerg #include <unordered_set> 18*da58b97aSjoerg 19*da58b97aSjoerg using namespace llvm; 20*da58b97aSjoerg using namespace sampleprof; 21*da58b97aSjoerg 22*da58b97aSjoerg namespace llvm { 23*da58b97aSjoerg namespace sampleprof { 24*da58b97aSjoerg 25*da58b97aSjoerg class ProfileGenerator { 26*da58b97aSjoerg 27*da58b97aSjoerg public: ProfileGenerator()28*da58b97aSjoerg ProfileGenerator(){}; 29*da58b97aSjoerg virtual ~ProfileGenerator() = default; 30*da58b97aSjoerg static std::unique_ptr<ProfileGenerator> 31*da58b97aSjoerg create(const BinarySampleCounterMap &BinarySampleCounters, 32*da58b97aSjoerg enum PerfScriptType SampleType); 33*da58b97aSjoerg virtual void generateProfile() = 0; 34*da58b97aSjoerg // Use SampleProfileWriter to serialize profile map 35*da58b97aSjoerg virtual void write(std::unique_ptr<SampleProfileWriter> Writer, 36*da58b97aSjoerg StringMap<FunctionSamples> &ProfileMap); 37*da58b97aSjoerg void write(); 38*da58b97aSjoerg 39*da58b97aSjoerg protected: 40*da58b97aSjoerg /* 41*da58b97aSjoerg For each region boundary point, mark if it is begin or end (or both) of 42*da58b97aSjoerg the region. Boundary points are inclusive. Log the sample count as well 43*da58b97aSjoerg so we can use it when we compute the sample count of each disjoint region 44*da58b97aSjoerg later. Note that there might be multiple ranges with different sample 45*da58b97aSjoerg count that share same begin/end point. We need to accumulate the sample 46*da58b97aSjoerg count for the boundary point for such case, because for the example 47*da58b97aSjoerg below, 48*da58b97aSjoerg 49*da58b97aSjoerg |<--100-->| 50*da58b97aSjoerg |<------200------>| 51*da58b97aSjoerg A B C 52*da58b97aSjoerg 53*da58b97aSjoerg sample count for disjoint region [A,B] would be 300. 54*da58b97aSjoerg */ 55*da58b97aSjoerg void findDisjointRanges(RangeSample &DisjointRanges, 56*da58b97aSjoerg const RangeSample &Ranges); 57*da58b97aSjoerg 58*da58b97aSjoerg // Used by SampleProfileWriter 59*da58b97aSjoerg StringMap<FunctionSamples> ProfileMap; 60*da58b97aSjoerg }; 61*da58b97aSjoerg 62*da58b97aSjoerg class CSProfileGenerator : public ProfileGenerator { 63*da58b97aSjoerg protected: 64*da58b97aSjoerg const BinarySampleCounterMap &BinarySampleCounters; 65*da58b97aSjoerg 66*da58b97aSjoerg public: CSProfileGenerator(const BinarySampleCounterMap & Counters)67*da58b97aSjoerg CSProfileGenerator(const BinarySampleCounterMap &Counters) 68*da58b97aSjoerg : BinarySampleCounters(Counters){}; 69*da58b97aSjoerg 70*da58b97aSjoerg public: 71*da58b97aSjoerg void generateProfile() override; 72*da58b97aSjoerg 73*da58b97aSjoerg // Remove adjacent repeated context sequences up to a given sequence length, 74*da58b97aSjoerg // -1 means no size limit. Note that repeated sequences are identified based 75*da58b97aSjoerg // on the exact call site, this is finer granularity than function recursion. 76*da58b97aSjoerg template <typename T> 77*da58b97aSjoerg static void compressRecursionContext(SmallVectorImpl<T> &Context, 78*da58b97aSjoerg int32_t CSize = MaxCompressionSize) { 79*da58b97aSjoerg uint32_t I = 1; 80*da58b97aSjoerg uint32_t HS = static_cast<uint32_t>(Context.size() / 2); 81*da58b97aSjoerg uint32_t MaxDedupSize = 82*da58b97aSjoerg CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS); 83*da58b97aSjoerg auto BeginIter = Context.begin(); 84*da58b97aSjoerg // Use an in-place algorithm to save memory copy 85*da58b97aSjoerg // End indicates the end location of current iteration's data 86*da58b97aSjoerg uint32_t End = 0; 87*da58b97aSjoerg // Deduplicate from length 1 to the max possible size of a repeated 88*da58b97aSjoerg // sequence. 89*da58b97aSjoerg while (I <= MaxDedupSize) { 90*da58b97aSjoerg // This is a linear algorithm that deduplicates adjacent repeated 91*da58b97aSjoerg // sequences of size I. The deduplication detection runs on a sliding 92*da58b97aSjoerg // window whose size is 2*I and it keeps sliding the window to deduplicate 93*da58b97aSjoerg // the data inside. Once duplication is detected, deduplicate it by 94*da58b97aSjoerg // skipping the right half part of the window, otherwise just copy back 95*da58b97aSjoerg // the new one by appending them at the back of End pointer(for the next 96*da58b97aSjoerg // iteration). 97*da58b97aSjoerg // 98*da58b97aSjoerg // For example: 99*da58b97aSjoerg // Input: [a1, a2, b1, b2] 100*da58b97aSjoerg // (Added index to distinguish the same char, the origin is [a, a, b, 101*da58b97aSjoerg // b], the size of the dedup window is 2(I = 1) at the beginning) 102*da58b97aSjoerg // 103*da58b97aSjoerg // 1) The initial status is a dummy window[null, a1], then just copy the 104*da58b97aSjoerg // right half of the window(End = 0), then slide the window. 105*da58b97aSjoerg // Result: [a1], a2, b1, b2 (End points to the element right before ], 106*da58b97aSjoerg // after ] is the data of the previous iteration) 107*da58b97aSjoerg // 108*da58b97aSjoerg // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of 109*da58b97aSjoerg // the window i.e the duplication happen. Only slide the window. 110*da58b97aSjoerg // Result: [a1], a2, b1, b2 111*da58b97aSjoerg // 112*da58b97aSjoerg // 3) Next window is [a2, b1], copy the right half of the window(b1 is 113*da58b97aSjoerg // new) to the End and slide the window. 114*da58b97aSjoerg // Result: [a1, b1], b1, b2 115*da58b97aSjoerg // 116*da58b97aSjoerg // 4) Next window is [b1, b2], same to 2), skip b2. 117*da58b97aSjoerg // Result: [a1, b1], b1, b2 118*da58b97aSjoerg // After resize, it will be [a, b] 119*da58b97aSjoerg 120*da58b97aSjoerg // Use pointers like below to do comparison inside the window 121*da58b97aSjoerg // [a b c a b c] 122*da58b97aSjoerg // | | | | | 123*da58b97aSjoerg // LeftBoundary Left Right Left+I Right+I 124*da58b97aSjoerg // A duplication found if Left < LeftBoundry. 125*da58b97aSjoerg 126*da58b97aSjoerg int32_t Right = I - 1; 127*da58b97aSjoerg End = I; 128*da58b97aSjoerg int32_t LeftBoundary = 0; 129*da58b97aSjoerg while (Right + I < Context.size()) { 130*da58b97aSjoerg // To avoids scanning a part of a sequence repeatedly, it finds out 131*da58b97aSjoerg // the common suffix of two hald in the window. The common suffix will 132*da58b97aSjoerg // serve as the common prefix of next possible pair of duplicate 133*da58b97aSjoerg // sequences. The non-common part will be ignored and never scanned 134*da58b97aSjoerg // again. 135*da58b97aSjoerg 136*da58b97aSjoerg // For example. 137*da58b97aSjoerg // Input: [a, b1], c1, b2, c2 138*da58b97aSjoerg // I = 2 139*da58b97aSjoerg // 140*da58b97aSjoerg // 1) For the window [a, b1, c1, b2], non-common-suffix for the right 141*da58b97aSjoerg // part is 'c1', copy it and only slide the window 1 step. 142*da58b97aSjoerg // Result: [a, b1, c1], b2, c2 143*da58b97aSjoerg // 144*da58b97aSjoerg // 2) Next window is [b1, c1, b2, c2], so duplication happen. 145*da58b97aSjoerg // Result after resize: [a, b, c] 146*da58b97aSjoerg 147*da58b97aSjoerg int32_t Left = Right; 148*da58b97aSjoerg while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) { 149*da58b97aSjoerg // Find the longest suffix inside the window. When stops, Left points 150*da58b97aSjoerg // at the diverging point in the current sequence. 151*da58b97aSjoerg Left--; 152*da58b97aSjoerg } 153*da58b97aSjoerg 154*da58b97aSjoerg bool DuplicationFound = (Left < LeftBoundary); 155*da58b97aSjoerg // Don't need to recheck the data before Right 156*da58b97aSjoerg LeftBoundary = Right + 1; 157*da58b97aSjoerg if (DuplicationFound) { 158*da58b97aSjoerg // Duplication found, skip right half of the window. 159*da58b97aSjoerg Right += I; 160*da58b97aSjoerg } else { 161*da58b97aSjoerg // Copy the non-common-suffix part of the adjacent sequence. 162*da58b97aSjoerg std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1, 163*da58b97aSjoerg BeginIter + End); 164*da58b97aSjoerg End += Left + I - Right; 165*da58b97aSjoerg // Only slide the window by the size of non-common-suffix 166*da58b97aSjoerg Right = Left + I; 167*da58b97aSjoerg } 168*da58b97aSjoerg } 169*da58b97aSjoerg // Don't forget the remaining part that's not scanned. 170*da58b97aSjoerg std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End); 171*da58b97aSjoerg End += Context.size() - Right - 1; 172*da58b97aSjoerg I++; 173*da58b97aSjoerg Context.resize(End); 174*da58b97aSjoerg MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize); 175*da58b97aSjoerg } 176*da58b97aSjoerg } 177*da58b97aSjoerg 178*da58b97aSjoerg protected: 179*da58b97aSjoerg // Lookup or create FunctionSamples for the context 180*da58b97aSjoerg FunctionSamples &getFunctionProfileForContext(StringRef ContextId, 181*da58b97aSjoerg bool WasLeafInlined = false); 182*da58b97aSjoerg // Post processing for profiles before writing out, such as mermining 183*da58b97aSjoerg // and trimming cold profiles, running preinliner on profiles. 184*da58b97aSjoerg void postProcessProfiles(); 185*da58b97aSjoerg void computeSummaryAndThreshold(); 186*da58b97aSjoerg void write(std::unique_ptr<SampleProfileWriter> Writer, 187*da58b97aSjoerg StringMap<FunctionSamples> &ProfileMap) override; 188*da58b97aSjoerg 189*da58b97aSjoerg // Thresholds from profile summary to answer isHotCount/isColdCount queries. 190*da58b97aSjoerg uint64_t HotCountThreshold; 191*da58b97aSjoerg uint64_t ColdCountThreshold; 192*da58b97aSjoerg 193*da58b97aSjoerg // String table owning context strings created from profile generation. 194*da58b97aSjoerg std::unordered_set<std::string> ContextStrings; 195*da58b97aSjoerg 196*da58b97aSjoerg private: 197*da58b97aSjoerg // Helper function for updating body sample for a leaf location in 198*da58b97aSjoerg // FunctionProfile 199*da58b97aSjoerg void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, 200*da58b97aSjoerg const FrameLocation &LeafLoc, 201*da58b97aSjoerg uint64_t Count); 202*da58b97aSjoerg void populateFunctionBodySamples(FunctionSamples &FunctionProfile, 203*da58b97aSjoerg const RangeSample &RangeCounters, 204*da58b97aSjoerg ProfiledBinary *Binary); 205*da58b97aSjoerg void populateFunctionBoundarySamples(StringRef ContextId, 206*da58b97aSjoerg FunctionSamples &FunctionProfile, 207*da58b97aSjoerg const BranchSample &BranchCounters, 208*da58b97aSjoerg ProfiledBinary *Binary); 209*da58b97aSjoerg void populateInferredFunctionSamples(); 210*da58b97aSjoerg 211*da58b97aSjoerg public: 212*da58b97aSjoerg // Deduplicate adjacent repeated context sequences up to a given sequence 213*da58b97aSjoerg // length. -1 means no size limit. 214*da58b97aSjoerg static int32_t MaxCompressionSize; 215*da58b97aSjoerg }; 216*da58b97aSjoerg 217*da58b97aSjoerg using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>; 218*da58b97aSjoerg 219*da58b97aSjoerg class PseudoProbeCSProfileGenerator : public CSProfileGenerator { 220*da58b97aSjoerg 221*da58b97aSjoerg public: PseudoProbeCSProfileGenerator(const BinarySampleCounterMap & Counters)222*da58b97aSjoerg PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters) 223*da58b97aSjoerg : CSProfileGenerator(Counters) {} 224*da58b97aSjoerg void generateProfile() override; 225*da58b97aSjoerg 226*da58b97aSjoerg private: 227*da58b97aSjoerg // Go through each address from range to extract the top frame probe by 228*da58b97aSjoerg // looking up in the Address2ProbeMap 229*da58b97aSjoerg void extractProbesFromRange(const RangeSample &RangeCounter, 230*da58b97aSjoerg ProbeCounterMap &ProbeCounter, 231*da58b97aSjoerg ProfiledBinary *Binary); 232*da58b97aSjoerg // Fill in function body samples from probes 233*da58b97aSjoerg void 234*da58b97aSjoerg populateBodySamplesWithProbes(const RangeSample &RangeCounter, 235*da58b97aSjoerg SmallVectorImpl<std::string> &ContextStrStack, 236*da58b97aSjoerg ProfiledBinary *Binary); 237*da58b97aSjoerg // Fill in boundary samples for a call probe 238*da58b97aSjoerg void populateBoundarySamplesWithProbes( 239*da58b97aSjoerg const BranchSample &BranchCounter, 240*da58b97aSjoerg SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary); 241*da58b97aSjoerg // Helper function to get FunctionSamples for the leaf inlined context 242*da58b97aSjoerg FunctionSamples & 243*da58b97aSjoerg getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack, 244*da58b97aSjoerg const PseudoProbeFuncDesc *LeafFuncDesc, 245*da58b97aSjoerg bool WasLeafInlined); 246*da58b97aSjoerg // Helper function to get FunctionSamples for the leaf probe 247*da58b97aSjoerg FunctionSamples & 248*da58b97aSjoerg getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack, 249*da58b97aSjoerg const PseudoProbe *LeafProbe, 250*da58b97aSjoerg ProfiledBinary *Binary); 251*da58b97aSjoerg }; 252*da58b97aSjoerg 253*da58b97aSjoerg } // end namespace sampleprof 254*da58b97aSjoerg } // end namespace llvm 255*da58b97aSjoerg 256*da58b97aSjoerg #endif 257