1 //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10 #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11 #include "CSPreInliner.h"
12 #include "ErrorHandling.h"
13 #include "PerfReader.h"
14 #include "ProfiledBinary.h"
15 #include "llvm/ProfileData/SampleProfWriter.h"
16 #include <memory>
17 #include <unordered_set>
18 
19 using namespace llvm;
20 using namespace sampleprof;
21 
22 namespace llvm {
23 namespace sampleprof {
24 
25 class ProfileGenerator {
26 
27 public:
ProfileGenerator()28   ProfileGenerator(){};
29   virtual ~ProfileGenerator() = default;
30   static std::unique_ptr<ProfileGenerator>
31   create(const BinarySampleCounterMap &BinarySampleCounters,
32          enum PerfScriptType SampleType);
33   virtual void generateProfile() = 0;
34   // Use SampleProfileWriter to serialize profile map
35   virtual void write(std::unique_ptr<SampleProfileWriter> Writer,
36                      StringMap<FunctionSamples> &ProfileMap);
37   void write();
38 
39 protected:
40   /*
41   For each region boundary point, mark if it is begin or end (or both) of
42   the region. Boundary points are inclusive. Log the sample count as well
43   so we can use it when we compute the sample count of each disjoint region
44   later. Note that there might be multiple ranges with different sample
45   count that share same begin/end point. We need to accumulate the sample
46   count for the boundary point for such case, because for the example
47   below,
48 
49   |<--100-->|
50   |<------200------>|
51   A         B       C
52 
53   sample count for disjoint region [A,B] would be 300.
54   */
55   void findDisjointRanges(RangeSample &DisjointRanges,
56                           const RangeSample &Ranges);
57 
58   // Used by SampleProfileWriter
59   StringMap<FunctionSamples> ProfileMap;
60 };
61 
62 class CSProfileGenerator : public ProfileGenerator {
63 protected:
64   const BinarySampleCounterMap &BinarySampleCounters;
65 
66 public:
CSProfileGenerator(const BinarySampleCounterMap & Counters)67   CSProfileGenerator(const BinarySampleCounterMap &Counters)
68       : BinarySampleCounters(Counters){};
69 
70 public:
71   void generateProfile() override;
72 
73   // Remove adjacent repeated context sequences up to a given sequence length,
74   // -1 means no size limit. Note that repeated sequences are identified based
75   // on the exact call site, this is finer granularity than function recursion.
76   template <typename T>
77   static void compressRecursionContext(SmallVectorImpl<T> &Context,
78                                        int32_t CSize = MaxCompressionSize) {
79     uint32_t I = 1;
80     uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
81     uint32_t MaxDedupSize =
82         CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
83     auto BeginIter = Context.begin();
84     // Use an in-place algorithm to save memory copy
85     // End indicates the end location of current iteration's data
86     uint32_t End = 0;
87     // Deduplicate from length 1 to the max possible size of a repeated
88     // sequence.
89     while (I <= MaxDedupSize) {
90       // This is a linear algorithm that deduplicates adjacent repeated
91       // sequences of size I. The deduplication detection runs on a sliding
92       // window whose size is 2*I and it keeps sliding the window to deduplicate
93       // the data inside. Once duplication is detected, deduplicate it by
94       // skipping the right half part of the window, otherwise just copy back
95       // the new one by appending them at the back of End pointer(for the next
96       // iteration).
97       //
98       // For example:
99       // Input: [a1, a2, b1, b2]
100       // (Added index to distinguish the same char, the origin is [a, a, b,
101       // b], the size of the dedup window is 2(I = 1) at the beginning)
102       //
103       // 1) The initial status is a dummy window[null, a1], then just copy the
104       // right half of the window(End = 0), then slide the window.
105       // Result: [a1], a2, b1, b2 (End points to the element right before ],
106       // after ] is the data of the previous iteration)
107       //
108       // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
109       // the window i.e the duplication happen. Only slide the window.
110       // Result: [a1], a2, b1, b2
111       //
112       // 3) Next window is [a2, b1], copy the right half of the window(b1 is
113       // new) to the End and slide the window.
114       // Result: [a1, b1], b1, b2
115       //
116       // 4) Next window is [b1, b2], same to 2), skip b2.
117       // Result: [a1, b1], b1, b2
118       // After resize, it will be [a, b]
119 
120       // Use pointers like below to do comparison inside the window
121       //    [a         b         c        a       b        c]
122       //     |         |         |                |        |
123       // LeftBoundary Left     Right           Left+I    Right+I
124       // A duplication found if Left < LeftBoundry.
125 
126       int32_t Right = I - 1;
127       End = I;
128       int32_t LeftBoundary = 0;
129       while (Right + I < Context.size()) {
130         // To avoids scanning a part of a sequence repeatedly, it finds out
131         // the common suffix of two hald in the window. The common suffix will
132         // serve as the common prefix of next possible pair of duplicate
133         // sequences. The non-common part will be ignored and never scanned
134         // again.
135 
136         // For example.
137         // Input: [a, b1], c1, b2, c2
138         // I = 2
139         //
140         // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
141         // part is 'c1', copy it and only slide the window 1 step.
142         // Result: [a, b1, c1], b2, c2
143         //
144         // 2) Next window is [b1, c1, b2, c2], so duplication happen.
145         // Result after resize: [a, b, c]
146 
147         int32_t Left = Right;
148         while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
149           // Find the longest suffix inside the window. When stops, Left points
150           // at the diverging point in the current sequence.
151           Left--;
152         }
153 
154         bool DuplicationFound = (Left < LeftBoundary);
155         // Don't need to recheck the data before Right
156         LeftBoundary = Right + 1;
157         if (DuplicationFound) {
158           // Duplication found, skip right half of the window.
159           Right += I;
160         } else {
161           // Copy the non-common-suffix part of the adjacent sequence.
162           std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
163                     BeginIter + End);
164           End += Left + I - Right;
165           // Only slide the window by the size of non-common-suffix
166           Right = Left + I;
167         }
168       }
169       // Don't forget the remaining part that's not scanned.
170       std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
171       End += Context.size() - Right - 1;
172       I++;
173       Context.resize(End);
174       MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
175     }
176   }
177 
178 protected:
179   // Lookup or create FunctionSamples for the context
180   FunctionSamples &getFunctionProfileForContext(StringRef ContextId,
181                                                 bool WasLeafInlined = false);
182   // Post processing for profiles before writing out, such as mermining
183   // and trimming cold profiles, running preinliner on profiles.
184   void postProcessProfiles();
185   void computeSummaryAndThreshold();
186   void write(std::unique_ptr<SampleProfileWriter> Writer,
187              StringMap<FunctionSamples> &ProfileMap) override;
188 
189   // Thresholds from profile summary to answer isHotCount/isColdCount queries.
190   uint64_t HotCountThreshold;
191   uint64_t ColdCountThreshold;
192 
193   // String table owning context strings created from profile generation.
194   std::unordered_set<std::string> ContextStrings;
195 
196 private:
197   // Helper function for updating body sample for a leaf location in
198   // FunctionProfile
199   void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
200                                            const FrameLocation &LeafLoc,
201                                            uint64_t Count);
202   void populateFunctionBodySamples(FunctionSamples &FunctionProfile,
203                                    const RangeSample &RangeCounters,
204                                    ProfiledBinary *Binary);
205   void populateFunctionBoundarySamples(StringRef ContextId,
206                                        FunctionSamples &FunctionProfile,
207                                        const BranchSample &BranchCounters,
208                                        ProfiledBinary *Binary);
209   void populateInferredFunctionSamples();
210 
211 public:
212   // Deduplicate adjacent repeated context sequences up to a given sequence
213   // length. -1 means no size limit.
214   static int32_t MaxCompressionSize;
215 };
216 
217 using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>;
218 
219 class PseudoProbeCSProfileGenerator : public CSProfileGenerator {
220 
221 public:
PseudoProbeCSProfileGenerator(const BinarySampleCounterMap & Counters)222   PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters)
223       : CSProfileGenerator(Counters) {}
224   void generateProfile() override;
225 
226 private:
227   // Go through each address from range to extract the top frame probe by
228   // looking up in the Address2ProbeMap
229   void extractProbesFromRange(const RangeSample &RangeCounter,
230                               ProbeCounterMap &ProbeCounter,
231                               ProfiledBinary *Binary);
232   // Fill in function body samples from probes
233   void
234   populateBodySamplesWithProbes(const RangeSample &RangeCounter,
235                                 SmallVectorImpl<std::string> &ContextStrStack,
236                                 ProfiledBinary *Binary);
237   // Fill in boundary samples for a call probe
238   void populateBoundarySamplesWithProbes(
239       const BranchSample &BranchCounter,
240       SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary);
241   // Helper function to get FunctionSamples for the leaf inlined context
242   FunctionSamples &
243   getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
244                                  const PseudoProbeFuncDesc *LeafFuncDesc,
245                                  bool WasLeafInlined);
246   // Helper function to get FunctionSamples for the leaf probe
247   FunctionSamples &
248   getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
249                                  const PseudoProbe *LeafProbe,
250                                  ProfiledBinary *Binary);
251 };
252 
253 } // end namespace sampleprof
254 } // end namespace llvm
255 
256 #endif
257