1 //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H 10 #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H 11 #include "CSPreInliner.h" 12 #include "ErrorHandling.h" 13 #include "PerfReader.h" 14 #include "ProfiledBinary.h" 15 #include "llvm/IR/DebugInfoMetadata.h" 16 #include "llvm/ProfileData/SampleProfWriter.h" 17 #include <memory> 18 #include <unordered_set> 19 20 using namespace llvm; 21 using namespace sampleprof; 22 23 namespace llvm { 24 namespace sampleprof { 25 26 using ProbeCounterMap = 27 std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>; 28 29 // This base class for profile generation of sample-based PGO. We reuse all 30 // structures relating to function profiles and profile writers as seen in 31 // /ProfileData/SampleProf.h. 32 class ProfileGeneratorBase { 33 34 public: ProfileGeneratorBase(ProfiledBinary * Binary)35 ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){}; ProfileGeneratorBase(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)36 ProfileGeneratorBase(ProfiledBinary *Binary, 37 const ContextSampleCounterMap *Counters) 38 : Binary(Binary), SampleCounters(Counters){}; ProfileGeneratorBase(ProfiledBinary * Binary,const SampleProfileMap && Profiles)39 ProfileGeneratorBase(ProfiledBinary *Binary, 40 const SampleProfileMap &&Profiles) 41 : Binary(Binary), ProfileMap(std::move(Profiles)){}; 42 43 virtual ~ProfileGeneratorBase() = default; 44 static std::unique_ptr<ProfileGeneratorBase> 45 create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters, 46 bool profileIsCS); 47 static std::unique_ptr<ProfileGeneratorBase> 48 create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap, 49 bool profileIsCS); 50 virtual void generateProfile() = 0; 51 void write(); 52 53 static uint32_t 54 getDuplicationFactor(unsigned Discriminator, 55 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) { 56 return UseFSD ? 1 57 : llvm::DILocation::getDuplicationFactorFromDiscriminator( 58 Discriminator); 59 } 60 61 static uint32_t 62 getBaseDiscriminator(unsigned Discriminator, 63 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) { 64 return UseFSD ? Discriminator 65 : DILocation::getBaseDiscriminatorFromDiscriminator( 66 Discriminator, /* IsFSDiscriminator */ false); 67 } 68 69 static bool UseFSDiscriminator; 70 71 protected: 72 // Use SampleProfileWriter to serialize profile map 73 void write(std::unique_ptr<SampleProfileWriter> Writer, 74 SampleProfileMap &ProfileMap); 75 /* 76 For each region boundary point, mark if it is begin or end (or both) of 77 the region. Boundary points are inclusive. Log the sample count as well 78 so we can use it when we compute the sample count of each disjoint region 79 later. Note that there might be multiple ranges with different sample 80 count that share same begin/end point. We need to accumulate the sample 81 count for the boundary point for such case, because for the example 82 below, 83 84 |<--100-->| 85 |<------200------>| 86 A B C 87 88 sample count for disjoint region [A,B] would be 300. 89 */ 90 void findDisjointRanges(RangeSample &DisjointRanges, 91 const RangeSample &Ranges); 92 93 // Go through each address from range to extract the top frame probe by 94 // looking up in the Address2ProbeMap 95 void extractProbesFromRange(const RangeSample &RangeCounter, 96 ProbeCounterMap &ProbeCounter, 97 bool FindDisjointRanges = true); 98 99 // Helper function for updating body sample for a leaf location in 100 // FunctionProfile 101 void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, 102 const SampleContextFrame &LeafLoc, 103 uint64_t Count); 104 105 void updateFunctionSamples(); 106 107 void updateTotalSamples(); 108 109 void updateCallsiteSamples(); 110 111 StringRef getCalleeNameForAddress(uint64_t TargetAddress); 112 113 void computeSummaryAndThreshold(SampleProfileMap &ProfileMap); 114 115 void calculateAndShowDensity(const SampleProfileMap &Profiles); 116 117 double calculateDensity(const SampleProfileMap &Profiles, 118 uint64_t HotCntThreshold); 119 120 void showDensitySuggestion(double Density); 121 122 void collectProfiledFunctions(); 123 124 bool collectFunctionsFromRawProfile( 125 std::unordered_set<const BinaryFunction *> &ProfiledFunctions); 126 127 // Collect profiled Functions for llvm sample profile input. 128 virtual bool collectFunctionsFromLLVMProfile( 129 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0; 130 131 // Thresholds from profile summary to answer isHotCount/isColdCount queries. 132 uint64_t HotCountThreshold; 133 134 uint64_t ColdCountThreshold; 135 136 ProfiledBinary *Binary = nullptr; 137 138 std::unique_ptr<ProfileSummary> Summary; 139 140 // Used by SampleProfileWriter 141 SampleProfileMap ProfileMap; 142 143 const ContextSampleCounterMap *SampleCounters = nullptr; 144 }; 145 146 class ProfileGenerator : public ProfileGeneratorBase { 147 148 public: ProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)149 ProfileGenerator(ProfiledBinary *Binary, 150 const ContextSampleCounterMap *Counters) 151 : ProfileGeneratorBase(Binary, Counters){}; ProfileGenerator(ProfiledBinary * Binary,const SampleProfileMap && Profiles)152 ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles) 153 : ProfileGeneratorBase(Binary, std::move(Profiles)){}; 154 void generateProfile() override; 155 156 private: 157 void generateLineNumBasedProfile(); 158 void generateProbeBasedProfile(); 159 RangeSample preprocessRangeCounter(const RangeSample &RangeCounter); 160 FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName); 161 // Helper function to get the leaf frame's FunctionProfile by traversing the 162 // inline stack and meanwhile it adds the total samples for each frame's 163 // function profile. 164 FunctionSamples & 165 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec, 166 uint64_t Count); 167 void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); 168 void 169 populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); 170 void 171 populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter); 172 void populateBoundarySamplesWithProbesForAllFunctions( 173 const BranchSample &BranchCounters); 174 void postProcessProfiles(); 175 void trimColdProfiles(const SampleProfileMap &Profiles, 176 uint64_t ColdCntThreshold); 177 bool collectFunctionsFromLLVMProfile( 178 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override; 179 }; 180 181 class CSProfileGenerator : public ProfileGeneratorBase { 182 public: CSProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)183 CSProfileGenerator(ProfiledBinary *Binary, 184 const ContextSampleCounterMap *Counters) 185 : ProfileGeneratorBase(Binary, Counters){}; CSProfileGenerator(ProfiledBinary * Binary,SampleProfileMap & Profiles)186 CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles) 187 : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){}; 188 void generateProfile() override; 189 190 // Trim the context stack at a given depth. 191 template <typename T> 192 static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) { 193 if (Depth < 0 || static_cast<size_t>(Depth) >= S.size()) 194 return; 195 std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(), 196 S.begin()); 197 S.resize(Depth); 198 } 199 200 // Remove adjacent repeated context sequences up to a given sequence length, 201 // -1 means no size limit. Note that repeated sequences are identified based 202 // on the exact call site, this is finer granularity than function recursion. 203 template <typename T> 204 static void compressRecursionContext(SmallVectorImpl<T> &Context, 205 int32_t CSize = MaxCompressionSize) { 206 uint32_t I = 1; 207 uint32_t HS = static_cast<uint32_t>(Context.size() / 2); 208 uint32_t MaxDedupSize = 209 CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS); 210 auto BeginIter = Context.begin(); 211 // Use an in-place algorithm to save memory copy 212 // End indicates the end location of current iteration's data 213 uint32_t End = 0; 214 // Deduplicate from length 1 to the max possible size of a repeated 215 // sequence. 216 while (I <= MaxDedupSize) { 217 // This is a linear algorithm that deduplicates adjacent repeated 218 // sequences of size I. The deduplication detection runs on a sliding 219 // window whose size is 2*I and it keeps sliding the window to deduplicate 220 // the data inside. Once duplication is detected, deduplicate it by 221 // skipping the right half part of the window, otherwise just copy back 222 // the new one by appending them at the back of End pointer(for the next 223 // iteration). 224 // 225 // For example: 226 // Input: [a1, a2, b1, b2] 227 // (Added index to distinguish the same char, the origin is [a, a, b, 228 // b], the size of the dedup window is 2(I = 1) at the beginning) 229 // 230 // 1) The initial status is a dummy window[null, a1], then just copy the 231 // right half of the window(End = 0), then slide the window. 232 // Result: [a1], a2, b1, b2 (End points to the element right before ], 233 // after ] is the data of the previous iteration) 234 // 235 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of 236 // the window i.e the duplication happen. Only slide the window. 237 // Result: [a1], a2, b1, b2 238 // 239 // 3) Next window is [a2, b1], copy the right half of the window(b1 is 240 // new) to the End and slide the window. 241 // Result: [a1, b1], b1, b2 242 // 243 // 4) Next window is [b1, b2], same to 2), skip b2. 244 // Result: [a1, b1], b1, b2 245 // After resize, it will be [a, b] 246 247 // Use pointers like below to do comparison inside the window 248 // [a b c a b c] 249 // | | | | | 250 // LeftBoundary Left Right Left+I Right+I 251 // A duplication found if Left < LeftBoundry. 252 253 int32_t Right = I - 1; 254 End = I; 255 int32_t LeftBoundary = 0; 256 while (Right + I < Context.size()) { 257 // To avoids scanning a part of a sequence repeatedly, it finds out 258 // the common suffix of two hald in the window. The common suffix will 259 // serve as the common prefix of next possible pair of duplicate 260 // sequences. The non-common part will be ignored and never scanned 261 // again. 262 263 // For example. 264 // Input: [a, b1], c1, b2, c2 265 // I = 2 266 // 267 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right 268 // part is 'c1', copy it and only slide the window 1 step. 269 // Result: [a, b1, c1], b2, c2 270 // 271 // 2) Next window is [b1, c1, b2, c2], so duplication happen. 272 // Result after resize: [a, b, c] 273 274 int32_t Left = Right; 275 while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) { 276 // Find the longest suffix inside the window. When stops, Left points 277 // at the diverging point in the current sequence. 278 Left--; 279 } 280 281 bool DuplicationFound = (Left < LeftBoundary); 282 // Don't need to recheck the data before Right 283 LeftBoundary = Right + 1; 284 if (DuplicationFound) { 285 // Duplication found, skip right half of the window. 286 Right += I; 287 } else { 288 // Copy the non-common-suffix part of the adjacent sequence. 289 std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1, 290 BeginIter + End); 291 End += Left + I - Right; 292 // Only slide the window by the size of non-common-suffix 293 Right = Left + I; 294 } 295 } 296 // Don't forget the remaining part that's not scanned. 297 std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End); 298 End += Context.size() - Right - 1; 299 I++; 300 Context.resize(End); 301 MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize); 302 } 303 } 304 305 private: 306 void generateLineNumBasedProfile(); 307 308 FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode, 309 bool WasLeafInlined = false); 310 311 // Lookup or create ContextTrieNode for the context, FunctionSamples is 312 // created inside this function. 313 ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context, 314 bool WasLeafInlined = false); 315 316 // For profiled only functions, on-demand compute their inline context 317 // function byte size which is used by the pre-inliner. 318 void computeSizeForProfiledFunctions(); 319 // Post processing for profiles before writing out, such as mermining 320 // and trimming cold profiles, running preinliner on profiles. 321 void postProcessProfiles(); 322 323 void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, 324 const RangeSample &RangeCounters); 325 326 void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode, 327 const BranchSample &BranchCounters); 328 329 void populateInferredFunctionSamples(ContextTrieNode &Node); 330 331 void updateFunctionSamples(); 332 333 void generateProbeBasedProfile(); 334 335 // Fill in function body samples from probes 336 void populateBodySamplesWithProbes(const RangeSample &RangeCounter, 337 const AddrBasedCtxKey *CtxKey); 338 // Fill in boundary samples for a call probe 339 void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, 340 const AddrBasedCtxKey *CtxKey); 341 342 ContextTrieNode * 343 getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey, 344 const MCDecodedPseudoProbe *LeafProbe); 345 346 // Helper function to get FunctionSamples for the leaf probe 347 FunctionSamples & 348 getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey, 349 const MCDecodedPseudoProbe *LeafProbe); 350 351 void convertToProfileMap(ContextTrieNode &Node, 352 SampleContextFrameVector &Context); 353 354 void convertToProfileMap(); 355 356 void computeSummaryAndThreshold(); 357 358 bool collectFunctionsFromLLVMProfile( 359 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override; 360 361 void initializeMissingFrameInferrer(); 362 363 // Given an input `Context`, output `NewContext` with inferred missing tail 364 // call frames. 365 void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context, 366 SmallVectorImpl<uint64_t> &NewContext); 367 getRootContext()368 ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); }; 369 370 // The container for holding the FunctionSamples used by context trie. 371 std::list<FunctionSamples> FSamplesList; 372 373 // Underlying context table serves for sample profile writer. 374 std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts; 375 376 SampleContextTracker ContextTracker; 377 378 bool IsProfileValidOnTrie = true; 379 380 public: 381 // Deduplicate adjacent repeated context sequences up to a given sequence 382 // length. -1 means no size limit. 383 static int32_t MaxCompressionSize; 384 static int MaxContextDepth; 385 }; 386 387 } // end namespace sampleprof 388 } // end namespace llvm 389 390 #endif 391