1*da58b97aSjoerg //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2*da58b97aSjoerg //
3*da58b97aSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*da58b97aSjoerg // See https://llvm.org/LICENSE.txt for license information.
5*da58b97aSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*da58b97aSjoerg //
7*da58b97aSjoerg //===----------------------------------------------------------------------===//
8*da58b97aSjoerg 
9*da58b97aSjoerg #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10*da58b97aSjoerg #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11*da58b97aSjoerg #include "CSPreInliner.h"
12*da58b97aSjoerg #include "ErrorHandling.h"
13*da58b97aSjoerg #include "PerfReader.h"
14*da58b97aSjoerg #include "ProfiledBinary.h"
15*da58b97aSjoerg #include "llvm/ProfileData/SampleProfWriter.h"
16*da58b97aSjoerg #include <memory>
17*da58b97aSjoerg #include <unordered_set>
18*da58b97aSjoerg 
19*da58b97aSjoerg using namespace llvm;
20*da58b97aSjoerg using namespace sampleprof;
21*da58b97aSjoerg 
22*da58b97aSjoerg namespace llvm {
23*da58b97aSjoerg namespace sampleprof {
24*da58b97aSjoerg 
25*da58b97aSjoerg class ProfileGenerator {
26*da58b97aSjoerg 
27*da58b97aSjoerg public:
ProfileGenerator()28*da58b97aSjoerg   ProfileGenerator(){};
29*da58b97aSjoerg   virtual ~ProfileGenerator() = default;
30*da58b97aSjoerg   static std::unique_ptr<ProfileGenerator>
31*da58b97aSjoerg   create(const BinarySampleCounterMap &BinarySampleCounters,
32*da58b97aSjoerg          enum PerfScriptType SampleType);
33*da58b97aSjoerg   virtual void generateProfile() = 0;
34*da58b97aSjoerg   // Use SampleProfileWriter to serialize profile map
35*da58b97aSjoerg   virtual void write(std::unique_ptr<SampleProfileWriter> Writer,
36*da58b97aSjoerg                      StringMap<FunctionSamples> &ProfileMap);
37*da58b97aSjoerg   void write();
38*da58b97aSjoerg 
39*da58b97aSjoerg protected:
40*da58b97aSjoerg   /*
41*da58b97aSjoerg   For each region boundary point, mark if it is begin or end (or both) of
42*da58b97aSjoerg   the region. Boundary points are inclusive. Log the sample count as well
43*da58b97aSjoerg   so we can use it when we compute the sample count of each disjoint region
44*da58b97aSjoerg   later. Note that there might be multiple ranges with different sample
45*da58b97aSjoerg   count that share same begin/end point. We need to accumulate the sample
46*da58b97aSjoerg   count for the boundary point for such case, because for the example
47*da58b97aSjoerg   below,
48*da58b97aSjoerg 
49*da58b97aSjoerg   |<--100-->|
50*da58b97aSjoerg   |<------200------>|
51*da58b97aSjoerg   A         B       C
52*da58b97aSjoerg 
53*da58b97aSjoerg   sample count for disjoint region [A,B] would be 300.
54*da58b97aSjoerg   */
55*da58b97aSjoerg   void findDisjointRanges(RangeSample &DisjointRanges,
56*da58b97aSjoerg                           const RangeSample &Ranges);
57*da58b97aSjoerg 
58*da58b97aSjoerg   // Used by SampleProfileWriter
59*da58b97aSjoerg   StringMap<FunctionSamples> ProfileMap;
60*da58b97aSjoerg };
61*da58b97aSjoerg 
62*da58b97aSjoerg class CSProfileGenerator : public ProfileGenerator {
63*da58b97aSjoerg protected:
64*da58b97aSjoerg   const BinarySampleCounterMap &BinarySampleCounters;
65*da58b97aSjoerg 
66*da58b97aSjoerg public:
CSProfileGenerator(const BinarySampleCounterMap & Counters)67*da58b97aSjoerg   CSProfileGenerator(const BinarySampleCounterMap &Counters)
68*da58b97aSjoerg       : BinarySampleCounters(Counters){};
69*da58b97aSjoerg 
70*da58b97aSjoerg public:
71*da58b97aSjoerg   void generateProfile() override;
72*da58b97aSjoerg 
73*da58b97aSjoerg   // Remove adjacent repeated context sequences up to a given sequence length,
74*da58b97aSjoerg   // -1 means no size limit. Note that repeated sequences are identified based
75*da58b97aSjoerg   // on the exact call site, this is finer granularity than function recursion.
76*da58b97aSjoerg   template <typename T>
77*da58b97aSjoerg   static void compressRecursionContext(SmallVectorImpl<T> &Context,
78*da58b97aSjoerg                                        int32_t CSize = MaxCompressionSize) {
79*da58b97aSjoerg     uint32_t I = 1;
80*da58b97aSjoerg     uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
81*da58b97aSjoerg     uint32_t MaxDedupSize =
82*da58b97aSjoerg         CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
83*da58b97aSjoerg     auto BeginIter = Context.begin();
84*da58b97aSjoerg     // Use an in-place algorithm to save memory copy
85*da58b97aSjoerg     // End indicates the end location of current iteration's data
86*da58b97aSjoerg     uint32_t End = 0;
87*da58b97aSjoerg     // Deduplicate from length 1 to the max possible size of a repeated
88*da58b97aSjoerg     // sequence.
89*da58b97aSjoerg     while (I <= MaxDedupSize) {
90*da58b97aSjoerg       // This is a linear algorithm that deduplicates adjacent repeated
91*da58b97aSjoerg       // sequences of size I. The deduplication detection runs on a sliding
92*da58b97aSjoerg       // window whose size is 2*I and it keeps sliding the window to deduplicate
93*da58b97aSjoerg       // the data inside. Once duplication is detected, deduplicate it by
94*da58b97aSjoerg       // skipping the right half part of the window, otherwise just copy back
95*da58b97aSjoerg       // the new one by appending them at the back of End pointer(for the next
96*da58b97aSjoerg       // iteration).
97*da58b97aSjoerg       //
98*da58b97aSjoerg       // For example:
99*da58b97aSjoerg       // Input: [a1, a2, b1, b2]
100*da58b97aSjoerg       // (Added index to distinguish the same char, the origin is [a, a, b,
101*da58b97aSjoerg       // b], the size of the dedup window is 2(I = 1) at the beginning)
102*da58b97aSjoerg       //
103*da58b97aSjoerg       // 1) The initial status is a dummy window[null, a1], then just copy the
104*da58b97aSjoerg       // right half of the window(End = 0), then slide the window.
105*da58b97aSjoerg       // Result: [a1], a2, b1, b2 (End points to the element right before ],
106*da58b97aSjoerg       // after ] is the data of the previous iteration)
107*da58b97aSjoerg       //
108*da58b97aSjoerg       // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
109*da58b97aSjoerg       // the window i.e the duplication happen. Only slide the window.
110*da58b97aSjoerg       // Result: [a1], a2, b1, b2
111*da58b97aSjoerg       //
112*da58b97aSjoerg       // 3) Next window is [a2, b1], copy the right half of the window(b1 is
113*da58b97aSjoerg       // new) to the End and slide the window.
114*da58b97aSjoerg       // Result: [a1, b1], b1, b2
115*da58b97aSjoerg       //
116*da58b97aSjoerg       // 4) Next window is [b1, b2], same to 2), skip b2.
117*da58b97aSjoerg       // Result: [a1, b1], b1, b2
118*da58b97aSjoerg       // After resize, it will be [a, b]
119*da58b97aSjoerg 
120*da58b97aSjoerg       // Use pointers like below to do comparison inside the window
121*da58b97aSjoerg       //    [a         b         c        a       b        c]
122*da58b97aSjoerg       //     |         |         |                |        |
123*da58b97aSjoerg       // LeftBoundary Left     Right           Left+I    Right+I
124*da58b97aSjoerg       // A duplication found if Left < LeftBoundry.
125*da58b97aSjoerg 
126*da58b97aSjoerg       int32_t Right = I - 1;
127*da58b97aSjoerg       End = I;
128*da58b97aSjoerg       int32_t LeftBoundary = 0;
129*da58b97aSjoerg       while (Right + I < Context.size()) {
130*da58b97aSjoerg         // To avoids scanning a part of a sequence repeatedly, it finds out
131*da58b97aSjoerg         // the common suffix of two hald in the window. The common suffix will
132*da58b97aSjoerg         // serve as the common prefix of next possible pair of duplicate
133*da58b97aSjoerg         // sequences. The non-common part will be ignored and never scanned
134*da58b97aSjoerg         // again.
135*da58b97aSjoerg 
136*da58b97aSjoerg         // For example.
137*da58b97aSjoerg         // Input: [a, b1], c1, b2, c2
138*da58b97aSjoerg         // I = 2
139*da58b97aSjoerg         //
140*da58b97aSjoerg         // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
141*da58b97aSjoerg         // part is 'c1', copy it and only slide the window 1 step.
142*da58b97aSjoerg         // Result: [a, b1, c1], b2, c2
143*da58b97aSjoerg         //
144*da58b97aSjoerg         // 2) Next window is [b1, c1, b2, c2], so duplication happen.
145*da58b97aSjoerg         // Result after resize: [a, b, c]
146*da58b97aSjoerg 
147*da58b97aSjoerg         int32_t Left = Right;
148*da58b97aSjoerg         while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
149*da58b97aSjoerg           // Find the longest suffix inside the window. When stops, Left points
150*da58b97aSjoerg           // at the diverging point in the current sequence.
151*da58b97aSjoerg           Left--;
152*da58b97aSjoerg         }
153*da58b97aSjoerg 
154*da58b97aSjoerg         bool DuplicationFound = (Left < LeftBoundary);
155*da58b97aSjoerg         // Don't need to recheck the data before Right
156*da58b97aSjoerg         LeftBoundary = Right + 1;
157*da58b97aSjoerg         if (DuplicationFound) {
158*da58b97aSjoerg           // Duplication found, skip right half of the window.
159*da58b97aSjoerg           Right += I;
160*da58b97aSjoerg         } else {
161*da58b97aSjoerg           // Copy the non-common-suffix part of the adjacent sequence.
162*da58b97aSjoerg           std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
163*da58b97aSjoerg                     BeginIter + End);
164*da58b97aSjoerg           End += Left + I - Right;
165*da58b97aSjoerg           // Only slide the window by the size of non-common-suffix
166*da58b97aSjoerg           Right = Left + I;
167*da58b97aSjoerg         }
168*da58b97aSjoerg       }
169*da58b97aSjoerg       // Don't forget the remaining part that's not scanned.
170*da58b97aSjoerg       std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
171*da58b97aSjoerg       End += Context.size() - Right - 1;
172*da58b97aSjoerg       I++;
173*da58b97aSjoerg       Context.resize(End);
174*da58b97aSjoerg       MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
175*da58b97aSjoerg     }
176*da58b97aSjoerg   }
177*da58b97aSjoerg 
178*da58b97aSjoerg protected:
179*da58b97aSjoerg   // Lookup or create FunctionSamples for the context
180*da58b97aSjoerg   FunctionSamples &getFunctionProfileForContext(StringRef ContextId,
181*da58b97aSjoerg                                                 bool WasLeafInlined = false);
182*da58b97aSjoerg   // Post processing for profiles before writing out, such as mermining
183*da58b97aSjoerg   // and trimming cold profiles, running preinliner on profiles.
184*da58b97aSjoerg   void postProcessProfiles();
185*da58b97aSjoerg   void computeSummaryAndThreshold();
186*da58b97aSjoerg   void write(std::unique_ptr<SampleProfileWriter> Writer,
187*da58b97aSjoerg              StringMap<FunctionSamples> &ProfileMap) override;
188*da58b97aSjoerg 
189*da58b97aSjoerg   // Thresholds from profile summary to answer isHotCount/isColdCount queries.
190*da58b97aSjoerg   uint64_t HotCountThreshold;
191*da58b97aSjoerg   uint64_t ColdCountThreshold;
192*da58b97aSjoerg 
193*da58b97aSjoerg   // String table owning context strings created from profile generation.
194*da58b97aSjoerg   std::unordered_set<std::string> ContextStrings;
195*da58b97aSjoerg 
196*da58b97aSjoerg private:
197*da58b97aSjoerg   // Helper function for updating body sample for a leaf location in
198*da58b97aSjoerg   // FunctionProfile
199*da58b97aSjoerg   void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
200*da58b97aSjoerg                                            const FrameLocation &LeafLoc,
201*da58b97aSjoerg                                            uint64_t Count);
202*da58b97aSjoerg   void populateFunctionBodySamples(FunctionSamples &FunctionProfile,
203*da58b97aSjoerg                                    const RangeSample &RangeCounters,
204*da58b97aSjoerg                                    ProfiledBinary *Binary);
205*da58b97aSjoerg   void populateFunctionBoundarySamples(StringRef ContextId,
206*da58b97aSjoerg                                        FunctionSamples &FunctionProfile,
207*da58b97aSjoerg                                        const BranchSample &BranchCounters,
208*da58b97aSjoerg                                        ProfiledBinary *Binary);
209*da58b97aSjoerg   void populateInferredFunctionSamples();
210*da58b97aSjoerg 
211*da58b97aSjoerg public:
212*da58b97aSjoerg   // Deduplicate adjacent repeated context sequences up to a given sequence
213*da58b97aSjoerg   // length. -1 means no size limit.
214*da58b97aSjoerg   static int32_t MaxCompressionSize;
215*da58b97aSjoerg };
216*da58b97aSjoerg 
217*da58b97aSjoerg using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>;
218*da58b97aSjoerg 
219*da58b97aSjoerg class PseudoProbeCSProfileGenerator : public CSProfileGenerator {
220*da58b97aSjoerg 
221*da58b97aSjoerg public:
PseudoProbeCSProfileGenerator(const BinarySampleCounterMap & Counters)222*da58b97aSjoerg   PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters)
223*da58b97aSjoerg       : CSProfileGenerator(Counters) {}
224*da58b97aSjoerg   void generateProfile() override;
225*da58b97aSjoerg 
226*da58b97aSjoerg private:
227*da58b97aSjoerg   // Go through each address from range to extract the top frame probe by
228*da58b97aSjoerg   // looking up in the Address2ProbeMap
229*da58b97aSjoerg   void extractProbesFromRange(const RangeSample &RangeCounter,
230*da58b97aSjoerg                               ProbeCounterMap &ProbeCounter,
231*da58b97aSjoerg                               ProfiledBinary *Binary);
232*da58b97aSjoerg   // Fill in function body samples from probes
233*da58b97aSjoerg   void
234*da58b97aSjoerg   populateBodySamplesWithProbes(const RangeSample &RangeCounter,
235*da58b97aSjoerg                                 SmallVectorImpl<std::string> &ContextStrStack,
236*da58b97aSjoerg                                 ProfiledBinary *Binary);
237*da58b97aSjoerg   // Fill in boundary samples for a call probe
238*da58b97aSjoerg   void populateBoundarySamplesWithProbes(
239*da58b97aSjoerg       const BranchSample &BranchCounter,
240*da58b97aSjoerg       SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary);
241*da58b97aSjoerg   // Helper function to get FunctionSamples for the leaf inlined context
242*da58b97aSjoerg   FunctionSamples &
243*da58b97aSjoerg   getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
244*da58b97aSjoerg                                  const PseudoProbeFuncDesc *LeafFuncDesc,
245*da58b97aSjoerg                                  bool WasLeafInlined);
246*da58b97aSjoerg   // Helper function to get FunctionSamples for the leaf probe
247*da58b97aSjoerg   FunctionSamples &
248*da58b97aSjoerg   getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
249*da58b97aSjoerg                                  const PseudoProbe *LeafProbe,
250*da58b97aSjoerg                                  ProfiledBinary *Binary);
251*da58b97aSjoerg };
252*da58b97aSjoerg 
253*da58b97aSjoerg } // end namespace sampleprof
254*da58b97aSjoerg } // end namespace llvm
255*da58b97aSjoerg 
256*da58b97aSjoerg #endif
257