1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CallChainJoiner.h"
18 
19 #include <android-base/logging.h>
20 
21 #include "environment.h"
22 #include "utils.h"
23 
24 namespace simpleperf {
25 namespace call_chain_joiner_impl {
26 
LRUCache(size_t cache_size,size_t matched_node_count_to_extend_callchain)27 LRUCache::LRUCache(size_t cache_size, size_t matched_node_count_to_extend_callchain)
28     : node_set_(1024 * 16, CacheNodeHash, CacheNodeEqual) {
29   cache_stat_.cache_size = cache_size;
30   cache_stat_.max_node_count = cache_size / sizeof(CacheNode);
31   CHECK_GE(cache_stat_.max_node_count, 2u);
32   CHECK_GE(matched_node_count_to_extend_callchain, 1u);
33   cache_stat_.matched_node_count_to_extend_callchain = matched_node_count_to_extend_callchain;
34   nodes_ = new CacheNode[cache_stat_.max_node_count + 1]; // with 1 sentinel node
35   // nodes_[0] is the sentinel node of the LRU linked list.
36   nodes_[0].is_leaf = 1;
37   nodes_[0].parent_index = 0;
38   nodes_[0].leaf_link_prev = nodes_[0].leaf_link_next = 0;
39 }
40 
~LRUCache()41 LRUCache::~LRUCache() {
42   delete[] nodes_;
43 }
44 
AddCallChain(pid_t tid,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)45 void LRUCache::AddCallChain(pid_t tid, std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) {
46   // 1. Build current call chain.
47   std::vector<CacheNode*> chain;
48   for (size_t i = 0; i < ips.size(); ++i) {
49     CacheNode* node = GetNode(tid, ips[i], sps[i]);
50     chain.push_back(node);
51   }
52 
53   // 2. Check if the (tid, ip, sp) tuples of the top [matched_node_count_to_extend_callchain]
54   // nodes of current call chain exist in the cache and have the same relation as in current
55   // call chain. If true, we can extend current call chain.
56   bool can_extend = true;
57   if (chain.size() < cache_stat_.matched_node_count_to_extend_callchain) {
58     can_extend = false;
59   } else {
60     size_t chain_pos = chain.size() - 2;
61     for (size_t i = 1; i < cache_stat_.matched_node_count_to_extend_callchain; ++i) {
62       if (GetParent(chain[chain_pos]) != chain[chain_pos + 1]) {
63         can_extend = false;
64         break;
65       }
66     }
67   }
68   std::vector<uint64_t> origin_ip = ips;
69 
70   // 3. Add current call chain to the cache.
71   for (size_t i = 0; i + 1 < chain.size(); ++i) {
72     LinkParent(chain[i], chain[i + 1]);
73   }
74   // 4. Optionally extend current call chain.
75   if (can_extend) {
76     CacheNode* top = chain.back();
77     while ((top = GetParent(top)) != nullptr) {
78       if (top->sp == chain.back()->sp) {
79         // This is to prevent a loop case as below, which is unlikely to happen.
80         // The cache has node A.parent = B, while current call chain has node B.parent = A.
81         // This makes a loop of A -> B -> A -> B...
82         bool has_loop = false;
83         for (auto it = chain.rbegin(); it != chain.rend() && (*it)->sp == top->sp; ++it) {
84           if ((*it)->ip == top->ip) {
85             has_loop = true;
86             break;
87           }
88         }
89         if (has_loop) {
90           UnlinkParent(chain.back());
91           ips.resize(chain.size());
92           sps.resize(chain.size());
93           break;
94         }
95       }
96       ips.push_back(top->ip);
97       sps.push_back(top->sp);
98     }
99   } else {
100     UnlinkParent(chain.back());
101   }
102 }
103 
CacheNodeEqual(const CacheNode * n1,const CacheNode * n2)104 bool LRUCache::CacheNodeEqual(const CacheNode* n1, const CacheNode* n2) {
105   return n1->tid == n2->tid && n1->ip == n2->ip && n1->sp == n2->sp;
106 }
107 
CacheNodeHash(const CacheNode * n)108 size_t LRUCache::CacheNodeHash(const CacheNode* n) {
109   return static_cast<size_t>(n->tid ^ n->ip ^ n->sp);
110 }
111 
GetNode(uint32_t tid,uint64_t ip,uint64_t sp)112 CacheNode* LRUCache::GetNode(uint32_t tid, uint64_t ip, uint64_t sp) {
113   CacheNode* node = FindNode(tid, ip, sp);
114   if (node != nullptr) {
115     if (node->is_leaf) {
116       // Update the node's position in the LRU linked list.
117       RemoveNodeFromLRUList(node);
118       AppendNodeToLRUList(node);
119     }
120     return node;
121   }
122   node = AllocNode();
123   node->tid = tid;
124   node->ip = ip;
125   node->sp = sp;
126   node->is_leaf = 1;
127   node->parent_index = 0;
128   node->leaf_link_prev = node->leaf_link_next = GetNodeIndex(node);
129   node_set_.insert(node);
130   AppendNodeToLRUList(node);
131   return node;
132 }
133 
AllocNode()134 CacheNode* LRUCache::AllocNode() {
135   if (cache_stat_.used_node_count < cache_stat_.max_node_count) {
136     return &nodes_[++cache_stat_.used_node_count];
137   }
138   // Recycle the node at the front of the LRU linked list.
139   CacheNode* node = &nodes_[nodes_->leaf_link_next];
140   RemoveNodeFromLRUList(node);
141   node_set_.erase(node);
142   CacheNode* parent = GetParent(node);
143   if (parent != nullptr) {
144     DecreaseChildCountOfNode(parent);
145   }
146   cache_stat_.recycled_node_count++;
147   return node;
148 }
149 
LinkParent(CacheNode * child,CacheNode * new_parent)150 void LRUCache::LinkParent(CacheNode* child, CacheNode* new_parent) {
151   CacheNode* old_parent = GetParent(child);
152   if (old_parent != nullptr) {
153     DecreaseChildCountOfNode(old_parent);
154   }
155   child->parent_index = GetNodeIndex(new_parent);
156   if (new_parent->is_leaf) {
157     RemoveNodeFromLRUList(new_parent);
158     new_parent->is_leaf = 0;
159     new_parent->children_count = 1;
160   } else {
161     new_parent->children_count++;
162   }
163 }
164 
UnlinkParent(CacheNode * child)165 void LRUCache::UnlinkParent(CacheNode* child) {
166   CacheNode* old_parent = GetParent(child);
167   if (old_parent != nullptr) {
168     DecreaseChildCountOfNode(old_parent);
169   }
170   child->parent_index = 0;
171 }
172 
173 }  // call_chain_joiner_impl
174 
175 using namespace call_chain_joiner_impl;
176 
WriteCallChain(FILE * fp,pid_t pid,pid_t tid,CallChainJoiner::ChainType type,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps,size_t ip_count)177 static bool WriteCallChain(FILE* fp, pid_t pid, pid_t tid, CallChainJoiner::ChainType type,
178                            const std::vector<uint64_t>& ips,
179                            const std::vector<uint64_t>& sps,
180                            size_t ip_count) {
181   // Below is the content of a call chain stored in file.
182   //   uint32_t pid;
183   //   uint32_t tid;
184   //   uint32_t chain_type;
185   //   uint32_t ip_count;
186   //   uint64_t ips[];
187   //   uint64_t sps[];
188   //   uint32_t size;
189   uint32_t size = 4 * sizeof(uint32_t) + sizeof(uint64_t) * ip_count * 2 + sizeof(uint32_t);
190   std::vector<char> data(size);
191   char* p = data.data();
192   MoveToBinaryFormat(pid, p);
193   MoveToBinaryFormat(tid, p);
194   MoveToBinaryFormat(type, p);
195   MoveToBinaryFormat(static_cast<uint32_t>(ip_count), p);
196   MoveToBinaryFormat(ips.data(), ip_count, p);
197   MoveToBinaryFormat(sps.data(), ip_count, p);
198   MoveToBinaryFormat(size, p);
199   if (fwrite(data.data(), size, 1, fp) != 1) {
200     PLOG(ERROR) << "fwrite";
201     return false;
202   }
203   return true;
204 }
205 
ReadCallChain(FILE * fp,pid_t & pid,pid_t & tid,CallChainJoiner::ChainType & type,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)206 static bool ReadCallChain(FILE* fp, pid_t& pid, pid_t& tid, CallChainJoiner::ChainType& type,
207                           std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) {
208   std::vector<char> data(4 * sizeof(uint32_t));
209   if (fread(data.data(), data.size(), 1, fp) != 1) {
210     PLOG(ERROR) << "fread";
211     return false;
212   }
213   const char* p = data.data();
214   MoveFromBinaryFormat(pid, p);
215   MoveFromBinaryFormat(tid, p);
216   MoveFromBinaryFormat(type, p);
217   uint32_t ip_count;
218   MoveFromBinaryFormat(ip_count, p);
219   data.resize(sizeof(uint64_t) * ip_count * 2 + sizeof(uint32_t));
220   if (fread(data.data(), data.size(), 1, fp) != 1) {
221     PLOG(ERROR) << "fread";
222     return false;
223   }
224   p = data.data();
225   ips.resize(ip_count);
226   MoveFromBinaryFormat(ips.data(), ip_count, p);
227   sps.resize(ip_count);
228   MoveFromBinaryFormat(sps.data(), ip_count, p);
229   return true;
230 }
231 
ReadCallChainInReverseOrder(FILE * fp,pid_t & pid,pid_t & tid,CallChainJoiner::ChainType & type,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)232 static bool ReadCallChainInReverseOrder(FILE* fp, pid_t& pid, pid_t& tid,
233                                         CallChainJoiner::ChainType& type,
234                                         std::vector<uint64_t>& ips,
235                                         std::vector<uint64_t>& sps) {
236   uint32_t size;
237   if (fseek(fp, -4, SEEK_CUR) != 0 || fread(&size, sizeof(size), 1, fp) != 1) {
238     PLOG(ERROR) << "fread";
239     return false;
240   }
241   std::vector<char> data(size - 4);
242   if (fseek(fp, -static_cast<int>(size), SEEK_CUR) != 0 ||
243       fread(data.data(), data.size(), 1, fp) != 1 ||
244       fseek(fp, -static_cast<int>(data.size()), SEEK_CUR) != 0) {
245     PLOG(ERROR) << "fread";
246     return false;
247   }
248   const char* p = data.data();
249   MoveFromBinaryFormat(pid, p);
250   MoveFromBinaryFormat(tid, p);
251   MoveFromBinaryFormat(type, p);
252   uint32_t ip_count;
253   MoveFromBinaryFormat(ip_count, p);
254   ips.resize(ip_count);
255   MoveFromBinaryFormat(ips.data(), ip_count, p);
256   sps.resize(ip_count);
257   MoveFromBinaryFormat(sps.data(), ip_count, p);
258   return true;
259 }
260 
CreateTempFp()261 static FILE* CreateTempFp() {
262   std::unique_ptr<TemporaryFile> tmpfile = ScopedTempFiles::CreateTempFile();
263   FILE* fp = fdopen(tmpfile->release(), "web+");
264   if (fp == nullptr) {
265     PLOG(ERROR) << "fdopen";
266     return nullptr;
267   }
268   return fp;
269 }
270 
CallChainJoiner(size_t cache_size,size_t matched_node_count_to_extend_callchain,bool keep_original_callchains)271 CallChainJoiner::CallChainJoiner(size_t cache_size, size_t matched_node_count_to_extend_callchain,
272                                  bool keep_original_callchains)
273     : keep_original_callchains_(keep_original_callchains),
274       original_chains_fp_(nullptr),
275       joined_chains_fp_(nullptr),
276       next_chain_index_(0u) {
277   cache_stat_.cache_size = cache_size;
278   cache_stat_.matched_node_count_to_extend_callchain = matched_node_count_to_extend_callchain;
279 }
280 
~CallChainJoiner()281 CallChainJoiner::~CallChainJoiner() {
282   if (original_chains_fp_ != nullptr) {
283     fclose(original_chains_fp_);
284   }
285   if (joined_chains_fp_ != nullptr) {
286     fclose(joined_chains_fp_);
287   }
288 }
289 
AddCallChain(pid_t pid,pid_t tid,ChainType type,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps)290 bool CallChainJoiner::AddCallChain(pid_t pid, pid_t tid, ChainType type,
291                                    const std::vector<uint64_t>& ips,
292                                    const std::vector<uint64_t>& sps) {
293   CHECK_EQ(ips.size(), sps.size());
294   CHECK_GT(ips.size(), 0u);
295   CHECK(type == ORIGINAL_OFFLINE || type == ORIGINAL_REMOTE);
296   // Make sure sps are in non-decreasing order, and there is no duplicated items.
297   size_t ip_count = ips.size();
298   for (size_t i = 1; i < ips.size(); ++i) {
299     if (sps[i] < sps[i - 1]) {
300       ip_count = i;
301       break;
302     } else if (sps[i] == sps[i - 1]) {
303       bool has_duplicated_items = false;
304       for (size_t j = i; j > 0 && sps[j - 1] == sps[i]; --j) {
305         if (ips[j - 1] == ips[i]) {
306           has_duplicated_items = true;
307           break;
308         }
309       }
310       if (has_duplicated_items) {
311         ip_count = i;
312         break;
313       }
314     }
315   }
316 
317   if (original_chains_fp_ == nullptr) {
318     original_chains_fp_ = CreateTempFp();
319     if (original_chains_fp_ == nullptr) {
320       return false;
321     }
322   }
323   stat_.chain_count++;
324   return WriteCallChain(original_chains_fp_, pid, tid, type, ips, sps, ip_count);
325 }
326 
JoinCallChains()327 bool CallChainJoiner::JoinCallChains() {
328   if (stat_.chain_count == 0u) {
329     return true;
330   }
331   LRUCache cache(cache_stat_.cache_size, cache_stat_.matched_node_count_to_extend_callchain);
332   std::unique_ptr<FILE, decltype(&fclose)> tmp_fp(CreateTempFp(), fclose);
333   if (!tmp_fp) {
334     return false;
335   }
336   joined_chains_fp_ = CreateTempFp();
337   if (joined_chains_fp_ == nullptr) {
338     return false;
339   }
340   pid_t pid;
341   pid_t tid;
342   ChainType type;
343   std::vector<uint64_t> ips;
344   std::vector<uint64_t> sps;
345   if (fseek(original_chains_fp_, 0, SEEK_END) != 0) {
346     PLOG(ERROR) << "fseek";
347     return false;
348   }
349   std::vector<std::pair<FILE*, FILE*>> file_pairs = {
350       std::make_pair(original_chains_fp_, tmp_fp.get()),
351       std::make_pair(tmp_fp.get(), joined_chains_fp_)
352   };
353   for (size_t pass = 0; pass < 2u; ++pass) {
354     auto& pair = file_pairs[pass];
355     for (size_t i = 0; i < stat_.chain_count; ++i) {
356       if (!ReadCallChainInReverseOrder(pair.first, pid, tid, type, ips, sps)) {
357         return false;
358       }
359       if (pass == 0u) {
360         if (type == ORIGINAL_OFFLINE) {
361           type = JOINED_OFFLINE;
362         } else if (type == ORIGINAL_REMOTE) {
363           type = JOINED_REMOTE;
364         }
365         stat_.before_join_node_count += ips.size();
366       }
367 
368       cache.AddCallChain(tid, ips, sps);
369 
370       if (pass == 1u) {
371         stat_.after_join_node_count += ips.size();
372         stat_.after_join_max_chain_length = std::max(stat_.after_join_max_chain_length, ips.size());
373       }
374 
375       if (!WriteCallChain(pair.second, pid, tid, type, ips, sps, ips.size())) {
376         return false;
377       }
378     }
379   }
380   cache_stat_ = cache.Stat();
381   return true;
382 }
383 
GetNextCallChain(pid_t & pid,pid_t & tid,ChainType & type,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)384 bool CallChainJoiner::GetNextCallChain(pid_t& pid, pid_t& tid, ChainType& type,
385                                        std::vector<uint64_t>& ips,
386                                        std::vector<uint64_t>& sps) {
387   if (next_chain_index_ == stat_.chain_count * 2) {
388     // No more chains.
389     return false;
390   }
391   if (next_chain_index_ == 0u) {
392     if (fseek(original_chains_fp_, 0, SEEK_SET) != 0 ||
393         fseek(joined_chains_fp_, 0, SEEK_SET) != 0) {
394       PLOG(ERROR) << "fseek";
395       return false;
396     }
397   }
398   FILE* fp;
399   if (keep_original_callchains_) {
400     fp = (next_chain_index_ & 1) ? joined_chains_fp_ : original_chains_fp_;
401     next_chain_index_++;
402   } else {
403     fp = joined_chains_fp_;
404     next_chain_index_ += 2;
405   }
406   return ReadCallChain(fp, pid, tid, type, ips, sps);
407 }
408 
DumpStat()409 void CallChainJoiner::DumpStat() {
410   LOG(DEBUG) << "call chain joiner stat:";
411   LOG(DEBUG) << "  cache_size: " << cache_stat_.cache_size;
412   LOG(DEBUG) << "  matched_node_count_to_extend_callchain: "
413              << cache_stat_.matched_node_count_to_extend_callchain;
414   LOG(DEBUG) << "  max_node_count in cache: " << cache_stat_.max_node_count;
415   LOG(DEBUG) << "  used_node_count in cache: " << cache_stat_.used_node_count;
416   LOG(DEBUG) << "  recycled_node_count in cache: " << cache_stat_.recycled_node_count;
417   LOG(DEBUG) << "  call_chain_count: " << stat_.chain_count;
418   LOG(DEBUG) << "  before_join_node_count: " << stat_.before_join_node_count;
419   if (stat_.chain_count > 0u) {
420     LOG(DEBUG) << "  before_join_average_chain_length: "
421                << (stat_.before_join_node_count * 1.0 / stat_.chain_count);
422   }
423   LOG(DEBUG) << "  after_join_node_count: " << stat_.after_join_node_count;
424   if (stat_.chain_count > 0u) {
425     LOG(DEBUG) << "  after_join_average_chain_length: "
426                << (stat_.after_join_node_count * 1.0 / stat_.chain_count);
427   }
428   LOG(DEBUG) << "  after_join_max_chain_length: " << stat_.after_join_max_chain_length;
429 }
430 
431 }  // namespace simpleperf
432