1 //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10 #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11 #include "CSPreInliner.h"
12 #include "ErrorHandling.h"
13 #include "PerfReader.h"
14 #include "ProfiledBinary.h"
15 #include "llvm/IR/DebugInfoMetadata.h"
16 #include "llvm/ProfileData/SampleProfWriter.h"
17 #include <memory>
18 #include <unordered_set>
19 
20 using namespace llvm;
21 using namespace sampleprof;
22 
23 namespace llvm {
24 namespace sampleprof {
25 
26 using ProbeCounterMap =
27     std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
28 
29 // This base class for profile generation of sample-based PGO. We reuse all
30 // structures relating to function profiles and profile writers as seen in
31 // /ProfileData/SampleProf.h.
32 class ProfileGeneratorBase {
33 
34 public:
ProfileGeneratorBase(ProfiledBinary * Binary)35   ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){};
ProfileGeneratorBase(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)36   ProfileGeneratorBase(ProfiledBinary *Binary,
37                        const ContextSampleCounterMap *Counters)
38       : Binary(Binary), SampleCounters(Counters){};
ProfileGeneratorBase(ProfiledBinary * Binary,const SampleProfileMap && Profiles)39   ProfileGeneratorBase(ProfiledBinary *Binary,
40                        const SampleProfileMap &&Profiles)
41       : Binary(Binary), ProfileMap(std::move(Profiles)){};
42 
43   virtual ~ProfileGeneratorBase() = default;
44   static std::unique_ptr<ProfileGeneratorBase>
45   create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
46          bool profileIsCS);
47   static std::unique_ptr<ProfileGeneratorBase>
48   create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap,
49          bool profileIsCS);
50   virtual void generateProfile() = 0;
51   void write();
52 
53   static uint32_t
54   getDuplicationFactor(unsigned Discriminator,
55                        bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
56     return UseFSD ? 1
57                   : llvm::DILocation::getDuplicationFactorFromDiscriminator(
58                         Discriminator);
59   }
60 
61   static uint32_t
62   getBaseDiscriminator(unsigned Discriminator,
63                        bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
64     return UseFSD ? Discriminator
65                   : DILocation::getBaseDiscriminatorFromDiscriminator(
66                         Discriminator, /* IsFSDiscriminator */ false);
67   }
68 
69   static bool UseFSDiscriminator;
70 
71 protected:
72   // Use SampleProfileWriter to serialize profile map
73   void write(std::unique_ptr<SampleProfileWriter> Writer,
74              SampleProfileMap &ProfileMap);
75   /*
76   For each region boundary point, mark if it is begin or end (or both) of
77   the region. Boundary points are inclusive. Log the sample count as well
78   so we can use it when we compute the sample count of each disjoint region
79   later. Note that there might be multiple ranges with different sample
80   count that share same begin/end point. We need to accumulate the sample
81   count for the boundary point for such case, because for the example
82   below,
83 
84   |<--100-->|
85   |<------200------>|
86   A         B       C
87 
88   sample count for disjoint region [A,B] would be 300.
89   */
90   void findDisjointRanges(RangeSample &DisjointRanges,
91                           const RangeSample &Ranges);
92 
93   // Go through each address from range to extract the top frame probe by
94   // looking up in the Address2ProbeMap
95   void extractProbesFromRange(const RangeSample &RangeCounter,
96                               ProbeCounterMap &ProbeCounter,
97                               bool FindDisjointRanges = true);
98 
99   // Helper function for updating body sample for a leaf location in
100   // FunctionProfile
101   void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
102                                            const SampleContextFrame &LeafLoc,
103                                            uint64_t Count);
104 
105   void updateFunctionSamples();
106 
107   void updateTotalSamples();
108 
109   void updateCallsiteSamples();
110 
111   StringRef getCalleeNameForAddress(uint64_t TargetAddress);
112 
113   void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
114 
115   void calculateAndShowDensity(const SampleProfileMap &Profiles);
116 
117   double calculateDensity(const SampleProfileMap &Profiles,
118                           uint64_t HotCntThreshold);
119 
120   void showDensitySuggestion(double Density);
121 
122   void collectProfiledFunctions();
123 
124   bool collectFunctionsFromRawProfile(
125       std::unordered_set<const BinaryFunction *> &ProfiledFunctions);
126 
127   // Collect profiled Functions for llvm sample profile input.
128   virtual bool collectFunctionsFromLLVMProfile(
129       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0;
130 
131   // Thresholds from profile summary to answer isHotCount/isColdCount queries.
132   uint64_t HotCountThreshold;
133 
134   uint64_t ColdCountThreshold;
135 
136   ProfiledBinary *Binary = nullptr;
137 
138   std::unique_ptr<ProfileSummary> Summary;
139 
140   // Used by SampleProfileWriter
141   SampleProfileMap ProfileMap;
142 
143   const ContextSampleCounterMap *SampleCounters = nullptr;
144 };
145 
146 class ProfileGenerator : public ProfileGeneratorBase {
147 
148 public:
ProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)149   ProfileGenerator(ProfiledBinary *Binary,
150                    const ContextSampleCounterMap *Counters)
151       : ProfileGeneratorBase(Binary, Counters){};
ProfileGenerator(ProfiledBinary * Binary,const SampleProfileMap && Profiles)152   ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
153       : ProfileGeneratorBase(Binary, std::move(Profiles)){};
154   void generateProfile() override;
155 
156 private:
157   void generateLineNumBasedProfile();
158   void generateProbeBasedProfile();
159   RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
160   FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName);
161   // Helper function to get the leaf frame's FunctionProfile by traversing the
162   // inline stack and meanwhile it adds the total samples for each frame's
163   // function profile.
164   FunctionSamples &
165   getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
166                                    uint64_t Count);
167   void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
168   void
169   populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
170   void
171   populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
172   void populateBoundarySamplesWithProbesForAllFunctions(
173       const BranchSample &BranchCounters);
174   void postProcessProfiles();
175   void trimColdProfiles(const SampleProfileMap &Profiles,
176                         uint64_t ColdCntThreshold);
177   bool collectFunctionsFromLLVMProfile(
178       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
179 };
180 
181 class CSProfileGenerator : public ProfileGeneratorBase {
182 public:
CSProfileGenerator(ProfiledBinary * Binary,const ContextSampleCounterMap * Counters)183   CSProfileGenerator(ProfiledBinary *Binary,
184                      const ContextSampleCounterMap *Counters)
185       : ProfileGeneratorBase(Binary, Counters){};
CSProfileGenerator(ProfiledBinary * Binary,SampleProfileMap & Profiles)186   CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles)
187       : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){};
188   void generateProfile() override;
189 
190   // Trim the context stack at a given depth.
191   template <typename T>
192   static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
193     if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
194       return;
195     std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
196               S.begin());
197     S.resize(Depth);
198   }
199 
200   // Remove adjacent repeated context sequences up to a given sequence length,
201   // -1 means no size limit. Note that repeated sequences are identified based
202   // on the exact call site, this is finer granularity than function recursion.
203   template <typename T>
204   static void compressRecursionContext(SmallVectorImpl<T> &Context,
205                                        int32_t CSize = MaxCompressionSize) {
206     uint32_t I = 1;
207     uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
208     uint32_t MaxDedupSize =
209         CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
210     auto BeginIter = Context.begin();
211     // Use an in-place algorithm to save memory copy
212     // End indicates the end location of current iteration's data
213     uint32_t End = 0;
214     // Deduplicate from length 1 to the max possible size of a repeated
215     // sequence.
216     while (I <= MaxDedupSize) {
217       // This is a linear algorithm that deduplicates adjacent repeated
218       // sequences of size I. The deduplication detection runs on a sliding
219       // window whose size is 2*I and it keeps sliding the window to deduplicate
220       // the data inside. Once duplication is detected, deduplicate it by
221       // skipping the right half part of the window, otherwise just copy back
222       // the new one by appending them at the back of End pointer(for the next
223       // iteration).
224       //
225       // For example:
226       // Input: [a1, a2, b1, b2]
227       // (Added index to distinguish the same char, the origin is [a, a, b,
228       // b], the size of the dedup window is 2(I = 1) at the beginning)
229       //
230       // 1) The initial status is a dummy window[null, a1], then just copy the
231       // right half of the window(End = 0), then slide the window.
232       // Result: [a1], a2, b1, b2 (End points to the element right before ],
233       // after ] is the data of the previous iteration)
234       //
235       // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
236       // the window i.e the duplication happen. Only slide the window.
237       // Result: [a1], a2, b1, b2
238       //
239       // 3) Next window is [a2, b1], copy the right half of the window(b1 is
240       // new) to the End and slide the window.
241       // Result: [a1, b1], b1, b2
242       //
243       // 4) Next window is [b1, b2], same to 2), skip b2.
244       // Result: [a1, b1], b1, b2
245       // After resize, it will be [a, b]
246 
247       // Use pointers like below to do comparison inside the window
248       //    [a         b         c        a       b        c]
249       //     |         |         |                |        |
250       // LeftBoundary Left     Right           Left+I    Right+I
251       // A duplication found if Left < LeftBoundry.
252 
253       int32_t Right = I - 1;
254       End = I;
255       int32_t LeftBoundary = 0;
256       while (Right + I < Context.size()) {
257         // To avoids scanning a part of a sequence repeatedly, it finds out
258         // the common suffix of two hald in the window. The common suffix will
259         // serve as the common prefix of next possible pair of duplicate
260         // sequences. The non-common part will be ignored and never scanned
261         // again.
262 
263         // For example.
264         // Input: [a, b1], c1, b2, c2
265         // I = 2
266         //
267         // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
268         // part is 'c1', copy it and only slide the window 1 step.
269         // Result: [a, b1, c1], b2, c2
270         //
271         // 2) Next window is [b1, c1, b2, c2], so duplication happen.
272         // Result after resize: [a, b, c]
273 
274         int32_t Left = Right;
275         while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
276           // Find the longest suffix inside the window. When stops, Left points
277           // at the diverging point in the current sequence.
278           Left--;
279         }
280 
281         bool DuplicationFound = (Left < LeftBoundary);
282         // Don't need to recheck the data before Right
283         LeftBoundary = Right + 1;
284         if (DuplicationFound) {
285           // Duplication found, skip right half of the window.
286           Right += I;
287         } else {
288           // Copy the non-common-suffix part of the adjacent sequence.
289           std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
290                     BeginIter + End);
291           End += Left + I - Right;
292           // Only slide the window by the size of non-common-suffix
293           Right = Left + I;
294         }
295       }
296       // Don't forget the remaining part that's not scanned.
297       std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
298       End += Context.size() - Right - 1;
299       I++;
300       Context.resize(End);
301       MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
302     }
303   }
304 
305 private:
306   void generateLineNumBasedProfile();
307 
308   FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
309                                               bool WasLeafInlined = false);
310 
311   // Lookup or create ContextTrieNode for the context, FunctionSamples is
312   // created inside this function.
313   ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context,
314                                           bool WasLeafInlined = false);
315 
316   // For profiled only functions, on-demand compute their inline context
317   // function byte size which is used by the pre-inliner.
318   void computeSizeForProfiledFunctions();
319   // Post processing for profiles before writing out, such as mermining
320   // and trimming cold profiles, running preinliner on profiles.
321   void postProcessProfiles();
322 
323   void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
324                                       const RangeSample &RangeCounters);
325 
326   void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode,
327                                           const BranchSample &BranchCounters);
328 
329   void populateInferredFunctionSamples(ContextTrieNode &Node);
330 
331   void updateFunctionSamples();
332 
333   void generateProbeBasedProfile();
334 
335   // Fill in function body samples from probes
336   void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
337                                      const AddrBasedCtxKey *CtxKey);
338   // Fill in boundary samples for a call probe
339   void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
340                                          const AddrBasedCtxKey *CtxKey);
341 
342   ContextTrieNode *
343   getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey,
344                              const MCDecodedPseudoProbe *LeafProbe);
345 
346   // Helper function to get FunctionSamples for the leaf probe
347   FunctionSamples &
348   getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey,
349                                  const MCDecodedPseudoProbe *LeafProbe);
350 
351   void convertToProfileMap(ContextTrieNode &Node,
352                            SampleContextFrameVector &Context);
353 
354   void convertToProfileMap();
355 
356   void computeSummaryAndThreshold();
357 
358   bool collectFunctionsFromLLVMProfile(
359       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
360 
361   void initializeMissingFrameInferrer();
362 
363   // Given an input `Context`, output `NewContext` with inferred missing tail
364   // call frames.
365   void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
366                           SmallVectorImpl<uint64_t> &NewContext);
367 
getRootContext()368   ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); };
369 
370   // The container for holding the FunctionSamples used by context trie.
371   std::list<FunctionSamples> FSamplesList;
372 
373   // Underlying context table serves for sample profile writer.
374   std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
375 
376   SampleContextTracker ContextTracker;
377 
378   bool IsProfileValidOnTrie = true;
379 
380 public:
381   // Deduplicate adjacent repeated context sequences up to a given sequence
382   // length. -1 means no size limit.
383   static int32_t MaxCompressionSize;
384   static int MaxContextDepth;
385 };
386 
387 } // end namespace sampleprof
388 } // end namespace llvm
389 
390 #endif
391