1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SIMPLE_PERF_CALLCHAIN_JOINER_H_
18 #define SIMPLE_PERF_CALLCHAIN_JOINER_H_
19 
20 #include <inttypes.h>
21 #include <stdio.h>
22 #include <unistd.h>
23 
24 #include <unordered_set>
25 #include <vector>
26 
27 namespace simpleperf {
28 
29 namespace call_chain_joiner_impl {
30 
31 // Each node represents a function in a call chain, having a tuple info (tid, ip, sp).
32 // A node remembers its parent in the call chain.
33 struct CacheNode {
34   uint64_t ip;
35   uint64_t sp;
36   uint32_t tid;
37   // Whether the node is at the bottom of a call chain.
38   uint32_t is_leaf : 1;
39   // The index of the parent node in a call chain.
40   uint32_t parent_index : 31;
41   // When is_leaf = false, children_count remembers how many nodes have current node as parent.
42   // When is_leaf = true, leaf_link_prev and leaf_link_next keeps current node in a LRU linked list.
43   union {
44     uint32_t children_count;
45     struct {
46       uint32_t leaf_link_prev;
47       uint32_t leaf_link_next;
48     };
49   };
50 };
51 
52 static_assert(sizeof(CacheNode) == 32, "");
53 
54 struct LRUCacheStat {
55   size_t cache_size = 0u;
56   size_t matched_node_count_to_extend_callchain = 0u;
57   size_t max_node_count = 0u;
58   size_t used_node_count = 0u;
59   size_t recycled_node_count = 0u;
60 };
61 
62 // LRUCache caches call chains in memory, and supports extending a call chain if the (tid, ip, sp)
63 // tuples of its top [matched_node_count_to_extend_callchain] appear in the cache.
64 class LRUCache {
65  public:
66   // cache_size is the bytes of memory we want to use in this cache.
67   // matched_node_count_to_extend_callchain decides how many nodes we need to match to extend a
68   // call chain. Higher value means more strict.
69   LRUCache(size_t cache_size = 8 * 1024 * 1024, size_t matched_node_count_to_extend_callchain = 1);
70   ~LRUCache();
71 
72   // Add a call chain in the cache, and extend it if possible.
73   void AddCallChain(pid_t tid, std::vector<uint64_t>& ips, std::vector<uint64_t>& sps);
74 
Stat()75   const LRUCacheStat& Stat() {
76     return cache_stat_;
77   }
78 
FindNode(uint32_t tid,uint64_t ip,uint64_t sp)79   CacheNode* FindNode(uint32_t tid, uint64_t ip, uint64_t sp) {
80     CacheNode key;
81     key.tid = tid;
82     key.ip = ip;
83     key.sp = sp;
84     auto it = node_set_.find(&key);
85     return it != node_set_.end() ? *it : nullptr;
86   }
87 
88  private:
89   static bool CacheNodeEqual(const CacheNode* n1, const CacheNode* n2);
90   static size_t CacheNodeHash(const CacheNode* n);
91 
92   typedef std::unordered_set<CacheNode*, decltype(&CacheNodeHash), decltype(&CacheNodeEqual)>
93       set_type;
94 
GetParent(CacheNode * node)95   CacheNode* GetParent(CacheNode* node) {
96     return node->parent_index == 0u ? nullptr : nodes_ + node->parent_index;
97   }
98 
GetNodeIndex(CacheNode * node)99   int GetNodeIndex(CacheNode* node) {
100     return node - nodes_;
101   }
102 
RemoveNodeFromLRUList(CacheNode * node)103   void RemoveNodeFromLRUList(CacheNode* node) {
104     CacheNode* prev = &nodes_[node->leaf_link_prev];
105     CacheNode* next = &nodes_[node->leaf_link_next];
106     prev->leaf_link_next = node->leaf_link_next;
107     next->leaf_link_prev = node->leaf_link_prev;
108   }
109 
AppendNodeToLRUList(CacheNode * node)110   void AppendNodeToLRUList(CacheNode* node) {
111     CacheNode* next = &nodes_[0];
112     CacheNode* prev = &nodes_[next->leaf_link_prev];
113     node->leaf_link_next = 0;
114     node->leaf_link_prev = next->leaf_link_prev;
115     next->leaf_link_prev = prev->leaf_link_next = GetNodeIndex(node);
116   }
117 
DecreaseChildCountOfNode(CacheNode * node)118   void DecreaseChildCountOfNode(CacheNode* node) {
119     if (--node->children_count == 0u) {
120       node->is_leaf = true;
121       AppendNodeToLRUList(node);
122     }
123   }
124 
125   CacheNode* GetNode(uint32_t tid, uint64_t ip, uint64_t sp);
126   CacheNode* AllocNode();
127   void LinkParent(CacheNode* child, CacheNode* new_parent);
128   void UnlinkParent(CacheNode* child);
129 
130   CacheNode* nodes_;
131   set_type node_set_;
132   LRUCacheStat cache_stat_;
133 };
134 
135 }  // namespace call_chain_joiner_impl
136 
137 // CallChainJoiner is used to join callchains of samples in the same thread, in order to get
138 // complete call graph. For example, if we have two samples for a thread:
139 //   sample 1: (ip A, sp A) -> (ip B, sp B) -> (ip C, sp C) -> ...
140 //   sample 2:                 (ip B, sp B) -> (ip C, sp C) -> ...
141 // Because we know (ip A, sp A) calls (ip B, sp B) in sample 1, then we can guess the (ip B, sp B)
142 // in sample 2 is also called from (ip A, sp A). So we can add (ip A, sp A) in sample 2 as below.
143 //   sample 2: (ip A, sp A) -> (ip B, sp B) -> (ip C, sp C) -> ...
144 class CallChainJoiner {
145  public:
146   // The parameters are used in LRUCache.
147   CallChainJoiner(size_t cache_size, size_t matched_node_count_to_extend_callchain,
148                   bool keep_original_callchains);
149   ~CallChainJoiner();
150 
151   enum ChainType {
152     ORIGINAL_OFFLINE,
153     ORIGINAL_REMOTE,
154     JOINED_OFFLINE,
155     JOINED_REMOTE,
156   };
157 
158   bool AddCallChain(pid_t pid, pid_t tid, ChainType type, const std::vector<uint64_t>& ips,
159                     const std::vector<uint64_t>& sps);
160   bool JoinCallChains();
161   bool GetNextCallChain(pid_t& pid, pid_t& tid, ChainType& type, std::vector<uint64_t>& ips,
162                         std::vector<uint64_t>& sps);
163 
164   struct Stat {
165     size_t chain_count = 0u;
166     size_t before_join_node_count = 0u;
167     size_t after_join_node_count = 0u;
168     size_t after_join_max_chain_length = 0u;
169   };
170   void DumpStat();
GetStat()171   const Stat& GetStat() {
172     return stat_;
173   }
GetCacheStat()174   const call_chain_joiner_impl::LRUCacheStat& GetCacheStat() {
175     return cache_stat_;
176   }
177 
178  private:
179 
180   bool keep_original_callchains_;
181   FILE* original_chains_fp_;
182   FILE* joined_chains_fp_;
183   size_t next_chain_index_;
184   call_chain_joiner_impl::LRUCacheStat cache_stat_;
185   Stat stat_;
186 };
187 
188 }  // namespace simpleperf
189 
190 #endif  // SIMPLE_PERF_CALLCHAIN_JOINER_H_
191