1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SIMPLE_PERF_SAMPLE_TREE_H_
18 #define SIMPLE_PERF_SAMPLE_TREE_H_
19 
20 #include <unordered_map>
21 
22 #include "callchain.h"
23 #include "OfflineUnwinder.h"
24 #include "perf_regs.h"
25 #include "record.h"
26 #include "SampleComparator.h"
27 #include "SampleDisplayer.h"
28 #include "thread_tree.h"
29 
30 using namespace simpleperf;
31 
32 // A SampleTree is a collection of samples. A profiling report is mainly about
33 // constructing a SampleTree and display it. There are three steps involved:
34 // build the tree, sort the tree, and display it. For example, if we want to
35 // show how many cpu-cycles are spent in different functions, we should do as
36 // follows:
37 // 1. Build a SampleTree from SampleRecords with each sample containing
38 //    (cpu-cycles, function name). When building the tree, we should merge
39 //    samples containing the same function name.
40 // 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
41 //    samples in a decreasing order of cpu-cycles, we should sort it like this.
42 // 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
43 //    pair.
44 //
45 // We represent the three steps with three template classes.
46 // 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
47 //    SampleTreeBuilder's constructor decides the property of samples should be
48 //    merged together.
49 // 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
50 //    sorted by SampleTreeSorter. The sort result decides the order to show
51 //    samples.
52 // 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
53 //    displays each sample in the SampleTree.
54 
55 template <typename EntryT, typename AccumulateInfoT>
56 class SampleTreeBuilder {
57  public:
SampleTreeBuilder(const SampleComparator<EntryT> & comparator)58   explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator)
59       : sample_set_(comparator),
60         accumulate_callchain_(false),
61         sample_comparator_(comparator),
62         callchain_sample_set_(comparator),
63         use_branch_address_(false),
64         build_callchain_(false),
65         use_caller_as_callchain_root_(false) {}
66 
~SampleTreeBuilder()67   virtual ~SampleTreeBuilder() {}
68 
SetBranchSampleOption(bool use_branch_address)69   void SetBranchSampleOption(bool use_branch_address) {
70     use_branch_address_ = use_branch_address;
71   }
72 
SetCallChainSampleOptions(bool accumulate_callchain,bool build_callchain,bool use_caller_as_callchain_root)73   void SetCallChainSampleOptions(bool accumulate_callchain,
74                                  bool build_callchain,
75                                  bool use_caller_as_callchain_root) {
76     accumulate_callchain_ = accumulate_callchain;
77     build_callchain_ = build_callchain;
78     use_caller_as_callchain_root_ = use_caller_as_callchain_root;
79     if (accumulate_callchain_) {
80       offline_unwinder_ = OfflineUnwinder::Create(false);
81     }
82   }
83 
ProcessSampleRecord(const SampleRecord & r)84   void ProcessSampleRecord(const SampleRecord& r) {
85     if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
86       for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
87         auto& item = r.branch_stack_data.stack[i];
88         if (item.from != 0 && item.to != 0) {
89           CreateBranchSample(r, item);
90         }
91       }
92       return;
93     }
94     bool in_kernel = r.InKernel();
95     AccumulateInfoT acc_info;
96     EntryT* sample = CreateSample(r, in_kernel, &acc_info);
97     if (sample == nullptr) {
98       return;
99     }
100     if (accumulate_callchain_) {
101       std::vector<uint64_t> ips;
102       if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
103         ips.insert(ips.end(), r.callchain_data.ips,
104                    r.callchain_data.ips + r.callchain_data.ip_nr);
105       }
106       const ThreadEntry* thread = GetThreadOfSample(sample);
107       // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
108       // make up for the missing kernel patch in N9. See b/22612370.
109       if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
110           (r.regs_user_data.reg_mask != 0) &&
111           (r.sample_type & PERF_SAMPLE_STACK_USER) &&
112           (r.GetValidStackSize() > 0)) {
113         RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
114         std::vector<uint64_t> user_ips;
115         std::vector<uint64_t> sps;
116         if (offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
117                                                r.GetValidStackSize(), &user_ips, &sps)) {
118           ips.push_back(PERF_CONTEXT_USER);
119           ips.insert(ips.end(), user_ips.begin(), user_ips.end());
120         }
121       }
122 
123       std::vector<EntryT*> callchain;
124       callchain.push_back(sample);
125 
126       bool first_ip = true;
127       for (auto& ip : ips) {
128         if (ip >= PERF_CONTEXT_MAX) {
129           switch (ip) {
130             case PERF_CONTEXT_KERNEL:
131               in_kernel = true;
132               break;
133             case PERF_CONTEXT_USER:
134               in_kernel = false;
135               break;
136             default:
137               LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
138           }
139         } else {
140           if (first_ip) {
141             first_ip = false;
142             // Remove duplication with sampled ip.
143             if (ip == r.ip_data.ip) {
144               continue;
145             }
146           }
147           EntryT* callchain_sample =
148               CreateCallChainSample(thread, sample, ip, in_kernel, callchain, acc_info);
149           if (callchain_sample == nullptr) {
150             break;
151           }
152           callchain.push_back(callchain_sample);
153         }
154       }
155 
156       if (build_callchain_) {
157         std::set<EntryT*> added_set;
158         if (use_caller_as_callchain_root_) {
159           std::reverse(callchain.begin(), callchain.end());
160         }
161         EntryT* parent = nullptr;
162         while (callchain.size() >= 2) {
163           EntryT* sample = callchain[0];
164           callchain.erase(callchain.begin());
165           // Add only once for recursive calls on callchain.
166           if (added_set.find(sample) != added_set.end()) {
167             continue;
168           }
169           added_set.insert(sample);
170           InsertCallChainForSample(sample, callchain, acc_info);
171           UpdateCallChainParentInfo(sample, parent);
172           parent = sample;
173         }
174       }
175     }
176   }
177 
GetSamples()178   std::vector<EntryT*> GetSamples() const {
179     std::vector<EntryT*> result;
180     for (auto& entry : sample_set_) {
181       result.push_back(entry);
182     }
183     return result;
184   }
185 
186  protected:
187   virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
188                                AccumulateInfoT* acc_info) = 0;
189   virtual EntryT* CreateBranchSample(const SampleRecord& r,
190                                      const BranchStackItemType& item) = 0;
191   virtual EntryT* CreateCallChainSample(const ThreadEntry* thread, const EntryT* sample,
192                                         uint64_t ip, bool in_kernel,
193                                         const std::vector<EntryT*>& callchain,
194                                         const AccumulateInfoT& acc_info) = 0;
195   virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
196   virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
FilterSample(const EntryT *)197   virtual bool FilterSample(const EntryT*) { return true; }
198 
UpdateSummary(const EntryT *)199   virtual void UpdateSummary(const EntryT*) {}
200 
201   virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
202 
InsertSample(std::unique_ptr<EntryT> sample)203   EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
204     if (sample == nullptr || !FilterSample(sample.get())) {
205       return nullptr;
206     }
207     UpdateSummary(sample.get());
208     EntryT* result;
209     auto it = sample_set_.find(sample.get());
210     if (it == sample_set_.end()) {
211       result = sample.get();
212       sample_set_.insert(sample.get());
213       sample_storage_.push_back(std::move(sample));
214     } else {
215       result = *it;
216       MergeSample(*it, sample.get());
217     }
218     return result;
219   }
220 
InsertCallChainSample(std::unique_ptr<EntryT> sample,const std::vector<EntryT * > & callchain)221   EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
222                                 const std::vector<EntryT*>& callchain) {
223     if (sample == nullptr) {
224       return nullptr;
225     }
226     if (!FilterSample(sample.get())) {
227       // Store in callchain_sample_set_ for use in other EntryT's callchain.
228       auto it = callchain_sample_set_.find(sample.get());
229       if (it != callchain_sample_set_.end()) {
230         return *it;
231       }
232       EntryT* result = sample.get();
233       callchain_sample_set_.insert(sample.get());
234       sample_storage_.push_back(std::move(sample));
235       return result;
236     }
237 
238     auto it = sample_set_.find(sample.get());
239     if (it != sample_set_.end()) {
240       EntryT* sample = *it;
241       // Process only once for recursive function call.
242       if (std::find(callchain.begin(), callchain.end(), sample) !=
243           callchain.end()) {
244         return sample;
245       }
246     }
247     return InsertSample(std::move(sample));
248   }
249 
InsertCallChainForSample(EntryT * sample,const std::vector<EntryT * > & callchain,const AccumulateInfoT & acc_info)250   void InsertCallChainForSample(EntryT* sample,
251                                 const std::vector<EntryT*>& callchain,
252                                 const AccumulateInfoT& acc_info) {
253     uint64_t period = GetPeriodForCallChain(acc_info);
254     sample->callchain.AddCallChain(
255         callchain, period, [&](const EntryT* s1, const EntryT* s2) {
256           return sample_comparator_.IsSameSample(s1, s2);
257         });
258   }
259 
AddCallChainDuplicateInfo()260   void AddCallChainDuplicateInfo() {
261     if (build_callchain_) {
262       for (EntryT* sample : sample_set_) {
263         auto it = callchain_parent_map_.find(sample);
264         if (it != callchain_parent_map_.end() && !it->second.has_multiple_parents) {
265           sample->callchain.duplicated = true;
266         }
267       }
268     }
269   }
270 
271   std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
272   bool accumulate_callchain_;
273 
274  private:
UpdateCallChainParentInfo(EntryT * sample,EntryT * parent)275   void UpdateCallChainParentInfo(EntryT* sample, EntryT* parent) {
276     if (parent == nullptr) {
277       return;
278     }
279     auto it = callchain_parent_map_.find(sample);
280     if (it == callchain_parent_map_.end()) {
281       CallChainParentInfo info;
282       info.parent = parent;
283       info.has_multiple_parents = false;
284       callchain_parent_map_[sample] = info;
285     } else if (it->second.parent != parent) {
286       it->second.has_multiple_parents = true;
287     }
288   }
289 
290   const SampleComparator<EntryT> sample_comparator_;
291   // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
292   // and only used in other EntryT's callchain.
293   std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
294   std::vector<std::unique_ptr<EntryT>> sample_storage_;
295 
296   struct CallChainParentInfo {
297     EntryT* parent;
298     bool has_multiple_parents;
299   };
300   std::unordered_map<EntryT*, CallChainParentInfo> callchain_parent_map_;
301 
302   bool use_branch_address_;
303   bool build_callchain_;
304   bool use_caller_as_callchain_root_;
305   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
306 };
307 
308 template <typename EntryT>
309 class SampleTreeSorter {
310  public:
SampleTreeSorter(SampleComparator<EntryT> comparator)311   explicit SampleTreeSorter(SampleComparator<EntryT> comparator)
312       : comparator_(comparator) {}
313 
~SampleTreeSorter()314   virtual ~SampleTreeSorter() {}
315 
Sort(std::vector<EntryT * > & v,bool sort_callchain)316   void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
317     if (sort_callchain) {
318       for (auto& sample : v) {
319         SortCallChain(sample);
320       }
321     }
322     if (!comparator_.empty()) {
323       std::sort(v.begin(), v.end(), [this](const EntryT* s1, const EntryT* s2) {
324         return comparator_(s1, s2);
325       });
326     }
327   }
328 
329  protected:
SortCallChain(EntryT * sample)330   void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
331 
332  private:
333   SampleComparator<EntryT> comparator_;
334 };
335 
336 template <typename EntryT, typename InfoT>
337 class SampleTreeDisplayer {
338  public:
SampleTreeDisplayer(SampleDisplayer<EntryT,InfoT> displayer)339   explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer)
340       : displayer_(displayer) {}
341 
~SampleTreeDisplayer()342   virtual ~SampleTreeDisplayer() {}
343 
DisplaySamples(FILE * fp,const std::vector<EntryT * > & samples,const InfoT * info)344   void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples,
345                       const InfoT* info) {
346     displayer_.SetInfo(info);
347     for (const auto& sample : samples) {
348       displayer_.AdjustWidth(sample);
349     }
350     displayer_.PrintNames(fp);
351     for (const auto& sample : samples) {
352       displayer_.PrintSample(fp, sample);
353     }
354   }
355 
356  private:
357   SampleDisplayer<EntryT, InfoT> displayer_;
358 };
359 
360 #endif  // SIMPLE_PERF_SAMPLE_TREE_H_
361