1 //===-- TraceHTR.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "TraceHTR.h"
10 
11 #include "lldb/Symbol/Function.h"
12 #include "lldb/Target/Process.h"
13 #include "lldb/Target/Target.h"
14 #include "llvm/Support/JSON.h"
15 #include <sstream>
16 #include <string>
17 
18 using namespace lldb_private;
19 using namespace lldb;
20 
21 size_t HTRBlockMetadata::GetNumInstructions() const {
22   return m_num_instructions;
23 }
24 
25 llvm::Optional<llvm::StringRef>
26 HTRBlockMetadata::GetMostFrequentlyCalledFunction() const {
27   size_t max_ncalls = 0;
28   llvm::Optional<llvm::StringRef> max_name = llvm::None;
29   for (const auto &it : m_func_calls) {
30     ConstString name = it.first;
31     size_t ncalls = it.second;
32     if (ncalls > max_ncalls) {
33       max_ncalls = ncalls;
34       max_name = name.GetStringRef();
35     }
36   }
37   return max_name;
38 }
39 
40 llvm::DenseMap<ConstString, size_t> const &
41 HTRBlockMetadata::GetFunctionCalls() const {
42   return m_func_calls;
43 }
44 
45 lldb::addr_t HTRBlockMetadata::GetFirstInstructionLoadAddress() const {
46   return m_first_instruction_load_address;
47 }
48 
49 size_t HTRBlock::GetOffset() const { return m_offset; }
50 
51 size_t HTRBlock::GetSize() const { return m_size; }
52 
53 HTRBlockMetadata const &HTRBlock::GetMetadata() const { return m_metadata; }
54 
55 llvm::ArrayRef<HTRBlockLayerUP> TraceHTR::GetBlockLayers() const {
56   return m_block_layer_ups;
57 }
58 
59 HTRInstructionLayer const &TraceHTR::GetInstructionLayer() const {
60   return *m_instruction_layer_up;
61 }
62 
63 void TraceHTR::AddNewBlockLayer(HTRBlockLayerUP &&block_layer) {
64   m_block_layer_ups.emplace_back(std::move(block_layer));
65 }
66 
67 size_t IHTRLayer::GetLayerId() const { return m_layer_id; }
68 
69 void HTRBlockLayer::AppendNewBlock(size_t block_id, HTRBlock &&block) {
70   m_block_id_trace.emplace_back(block_id);
71   m_block_defs.emplace(block_id, block);
72 }
73 
74 void HTRBlockLayer::AppendRepeatedBlock(size_t block_id) {
75   m_block_id_trace.emplace_back(block_id);
76 }
77 
78 llvm::ArrayRef<lldb::addr_t> HTRInstructionLayer::GetInstructionTrace() const {
79   return m_instruction_trace;
80 }
81 
82 void HTRInstructionLayer::AddCallInstructionMetadata(
83     lldb::addr_t load_addr, llvm::Optional<ConstString> func_name) {
84   m_call_isns.emplace(load_addr, func_name);
85 }
86 
87 void HTRInstructionLayer::AppendInstruction(lldb::addr_t load_addr) {
88   m_instruction_trace.emplace_back(load_addr);
89 }
90 
91 HTRBlock const *HTRBlockLayer::GetBlockById(size_t block_id) const {
92   auto block_it = m_block_defs.find(block_id);
93   if (block_it == m_block_defs.end())
94     return nullptr;
95   else
96     return &block_it->second;
97 }
98 
99 llvm::ArrayRef<size_t> HTRBlockLayer::GetBlockIdTrace() const {
100   return m_block_id_trace;
101 }
102 
103 size_t HTRBlockLayer::GetNumUnits() const { return m_block_id_trace.size(); }
104 
105 HTRBlockMetadata HTRInstructionLayer::GetMetadataByIndex(size_t index) const {
106   lldb::addr_t instruction_load_address = m_instruction_trace[index];
107   llvm::DenseMap<ConstString, size_t> func_calls;
108 
109   auto func_name_it = m_call_isns.find(instruction_load_address);
110   if (func_name_it != m_call_isns.end()) {
111     if (llvm::Optional<ConstString> func_name = func_name_it->second) {
112       func_calls[*func_name] = 1;
113     }
114   }
115   return {instruction_load_address, 1, std::move(func_calls)};
116 }
117 
118 size_t HTRInstructionLayer::GetNumUnits() const {
119   return m_instruction_trace.size();
120 }
121 
122 HTRBlockMetadata HTRBlockLayer::GetMetadataByIndex(size_t index) const {
123   size_t block_id = m_block_id_trace[index];
124   HTRBlock block = m_block_defs.find(block_id)->second;
125   return block.GetMetadata();
126 }
127 
128 TraceHTR::TraceHTR(Thread &thread, TraceCursor &cursor)
129     : m_instruction_layer_up(std::make_unique<HTRInstructionLayer>(0)) {
130 
131   // Move cursor to the first instruction in the trace
132   cursor.SetForwards(true);
133   cursor.Seek(0, TraceCursor::SeekType::Set);
134 
135   Target &target = thread.GetProcess()->GetTarget();
136   auto function_name_from_load_address =
137       [&](lldb::addr_t load_address) -> llvm::Optional<ConstString> {
138     lldb_private::Address pc_addr;
139     SymbolContext sc;
140     if (target.ResolveLoadAddress(load_address, pc_addr) &&
141         pc_addr.CalculateSymbolContext(&sc))
142       return sc.GetFunctionName()
143                  ? llvm::Optional<ConstString>(sc.GetFunctionName())
144                  : llvm::None;
145     else
146       return llvm::None;
147   };
148 
149   bool more_data_in_trace = true;
150   while (more_data_in_trace) {
151     if (cursor.IsError()) {
152       // Append a load address of 0 for all instructions that an error occured
153       // while decoding.
154       // TODO: Make distinction between errors by storing the error messages.
155       // Currently, all errors are treated the same.
156       m_instruction_layer_up->AppendInstruction(0);
157       more_data_in_trace = cursor.Next();
158     } else {
159       lldb::addr_t current_instruction_load_address = cursor.GetLoadAddress();
160       lldb::TraceInstructionControlFlowType current_instruction_type =
161           cursor.GetInstructionControlFlowType();
162 
163       m_instruction_layer_up->AppendInstruction(
164           current_instruction_load_address);
165       more_data_in_trace = cursor.Next();
166       if (current_instruction_type &
167           lldb::eTraceInstructionControlFlowTypeCall) {
168         if (more_data_in_trace && !cursor.IsError()) {
169           m_instruction_layer_up->AddCallInstructionMetadata(
170               current_instruction_load_address,
171               function_name_from_load_address(cursor.GetLoadAddress()));
172         } else {
173           // Next instruction is not known - pass None to indicate the name
174           // of the function being called is not known
175           m_instruction_layer_up->AddCallInstructionMetadata(
176               current_instruction_load_address, llvm::None);
177         }
178       }
179     }
180   }
181 }
182 
183 void HTRBlockMetadata::MergeMetadata(
184     HTRBlockMetadata &merged_metadata,
185     HTRBlockMetadata const &metadata_to_merge) {
186   merged_metadata.m_num_instructions += metadata_to_merge.m_num_instructions;
187   for (const auto &it : metadata_to_merge.m_func_calls) {
188     ConstString name = it.first;
189     size_t num_calls = it.second;
190     merged_metadata.m_func_calls[name] += num_calls;
191   }
192 }
193 
194 HTRBlock IHTRLayer::MergeUnits(size_t start_unit_index, size_t num_units) {
195   // TODO: make this function take `end_unit_index` as a parameter instead of
196   // unit and merge the range [start_unit_indx, end_unit_index] inclusive.
197   HTRBlockMetadata merged_metadata = GetMetadataByIndex(start_unit_index);
198   for (size_t i = start_unit_index + 1; i < start_unit_index + num_units; i++) {
199     // merge the new metadata into merged_metadata
200     HTRBlockMetadata::MergeMetadata(merged_metadata, GetMetadataByIndex(i));
201   }
202   return {start_unit_index, num_units, merged_metadata};
203 }
204 
205 void TraceHTR::ExecutePasses() {
206   auto are_passes_done = [](IHTRLayer &l1, IHTRLayer &l2) {
207     return l1.GetNumUnits() == l2.GetNumUnits();
208   };
209   HTRBlockLayerUP current_block_layer_up =
210       BasicSuperBlockMerge(*m_instruction_layer_up);
211   HTRBlockLayer &current_block_layer = *current_block_layer_up;
212   if (are_passes_done(*m_instruction_layer_up, *current_block_layer_up))
213     return;
214 
215   AddNewBlockLayer(std::move(current_block_layer_up));
216   while (true) {
217     HTRBlockLayerUP new_block_layer_up =
218         BasicSuperBlockMerge(current_block_layer);
219     if (are_passes_done(current_block_layer, *new_block_layer_up))
220       return;
221 
222     current_block_layer = *new_block_layer_up;
223     AddNewBlockLayer(std::move(new_block_layer_up));
224   }
225 }
226 
227 llvm::Error TraceHTR::Export(std::string outfile) {
228   std::error_code ec;
229   llvm::raw_fd_ostream os(outfile, ec, llvm::sys::fs::OF_Text);
230   if (ec) {
231     return llvm::make_error<llvm::StringError>(
232         "unable to open destination file: " + outfile, os.error());
233   } else {
234     os << toJSON(*this);
235     os.close();
236     if (os.has_error()) {
237       return llvm::make_error<llvm::StringError>(
238           "unable to write to destination file: " + outfile, os.error());
239     }
240   }
241   return llvm::Error::success();
242 }
243 
244 HTRBlockLayerUP lldb_private::BasicSuperBlockMerge(IHTRLayer &layer) {
245   std::unique_ptr<HTRBlockLayer> new_block_layer =
246       std::make_unique<HTRBlockLayer>(layer.GetLayerId() + 1);
247 
248   if (layer.GetNumUnits()) {
249     // Future Improvement: split this into two functions - one for finding heads
250     // and tails, one for merging/creating the next layer A 'head' is defined to
251     // be a block whose occurrences in the trace do not have a unique preceding
252     // block.
253     std::unordered_set<size_t> heads;
254 
255     // The load address of the first instruction of a block is the unique ID for
256     // that block (i.e. blocks with the same first instruction load address are
257     // the same block)
258 
259     // Future Improvement: no need to store all its preceding block ids, all we
260     // care about is that there is more than one preceding block id, so an enum
261     // could be used
262     std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> head_map;
263     lldb::addr_t prev_id =
264         layer.GetMetadataByIndex(0).GetFirstInstructionLoadAddress();
265     size_t num_units = layer.GetNumUnits();
266     // This excludes the first unit since it has no previous unit
267     for (size_t i = 1; i < num_units; i++) {
268       lldb::addr_t current_id =
269           layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
270       head_map[current_id].insert(prev_id);
271       prev_id = current_id;
272     }
273     for (const auto &it : head_map) {
274       // ID of 0 represents an error - errors can't be heads or tails
275       lldb::addr_t id = it.first;
276       const std::unordered_set<lldb::addr_t> predecessor_set = it.second;
277       if (id && predecessor_set.size() > 1)
278         heads.insert(id);
279     }
280 
281     // Future Improvement: identify heads and tails in the same loop
282     // A 'tail' is defined to be a block whose occurrences in the trace do
283     // not have a unique succeeding block.
284     std::unordered_set<lldb::addr_t> tails;
285     std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> tail_map;
286 
287     // This excludes the last unit since it has no next unit
288     for (size_t i = 0; i < num_units - 1; i++) {
289       lldb::addr_t current_id =
290           layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
291       lldb::addr_t next_id =
292           layer.GetMetadataByIndex(i + 1).GetFirstInstructionLoadAddress();
293       tail_map[current_id].insert(next_id);
294     }
295 
296     // Mark last block as tail so the algorithm stops gracefully
297     lldb::addr_t last_id = layer.GetMetadataByIndex(num_units - 1)
298                                .GetFirstInstructionLoadAddress();
299     tails.insert(last_id);
300     for (const auto &it : tail_map) {
301       lldb::addr_t id = it.first;
302       const std::unordered_set<lldb::addr_t> successor_set = it.second;
303       // ID of 0 represents an error - errors can't be heads or tails
304       if (id && successor_set.size() > 1)
305         tails.insert(id);
306     }
307 
308     // Need to keep track of size of string since things we push are variable
309     // length
310     size_t superblock_size = 0;
311     // Each super block always has the same first unit (we call this the
312     // super block head) This gurantee allows us to use the super block head as
313     // the unique key mapping to the super block it begins
314     llvm::Optional<size_t> superblock_head = llvm::None;
315     auto construct_next_layer = [&](size_t merge_start, size_t n) -> void {
316       if (!superblock_head)
317         return;
318       if (new_block_layer->GetBlockById(*superblock_head)) {
319         new_block_layer->AppendRepeatedBlock(*superblock_head);
320       } else {
321         HTRBlock new_block = layer.MergeUnits(merge_start, n);
322         new_block_layer->AppendNewBlock(*superblock_head, std::move(new_block));
323       }
324     };
325 
326     for (size_t i = 0; i < num_units; i++) {
327       lldb::addr_t unit_id =
328           layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
329       auto isHead = heads.count(unit_id) > 0;
330       auto isTail = tails.count(unit_id) > 0;
331 
332       if (isHead && isTail) {
333         // Head logic
334         if (superblock_size) { // this handles (tail, head) adjacency -
335                                // otherwise an empty
336                                // block is created
337           // End previous super block
338           construct_next_layer(i - superblock_size, superblock_size);
339         }
340         // Current id is first in next super block since it's a head
341         superblock_head = unit_id;
342         superblock_size = 1;
343 
344         // Tail logic
345         construct_next_layer(i - superblock_size + 1, superblock_size);
346         // Reset the block_head since the prev super block has come to and end
347         superblock_head = llvm::None;
348         superblock_size = 0;
349       } else if (isHead) {
350         if (superblock_size) { // this handles (tail, head) adjacency -
351                                // otherwise an empty
352                                // block is created
353           // End previous super block
354           construct_next_layer(i - superblock_size, superblock_size);
355         }
356         // Current id is first in next super block since it's a head
357         superblock_head = unit_id;
358         superblock_size = 1;
359       } else if (isTail) {
360         if (!superblock_head)
361           superblock_head = unit_id;
362         superblock_size++;
363 
364         // End previous super block
365         construct_next_layer(i - superblock_size + 1, superblock_size);
366         // Reset the block_head since the prev super block has come to and end
367         superblock_head = llvm::None;
368         superblock_size = 0;
369       } else {
370         if (!superblock_head)
371           superblock_head = unit_id;
372         superblock_size++;
373       }
374     }
375   }
376   return new_block_layer;
377 }
378 
379 llvm::json::Value lldb_private::toJSON(const TraceHTR &htr) {
380   std::vector<llvm::json::Value> layers_as_json;
381   for (size_t i = 0; i < htr.GetInstructionLayer().GetInstructionTrace().size();
382        i++) {
383     size_t layer_id = htr.GetInstructionLayer().GetLayerId();
384     HTRBlockMetadata metadata = htr.GetInstructionLayer().GetMetadataByIndex(i);
385     lldb::addr_t load_address = metadata.GetFirstInstructionLoadAddress();
386 
387     std::string display_name;
388 
389     std::stringstream stream;
390     stream << "0x" << std::hex << load_address;
391     std::string load_address_hex_string(stream.str());
392     display_name.assign(load_address_hex_string);
393 
394     // name: load address of the first instruction of the block and the name
395     // of the most frequently called function from the block (if applicable)
396 
397     // ph: the event type - 'X' for Complete events (see link to documentation
398     // below)
399 
400     // Since trace timestamps aren't yet supported in HTR, the ts (timestamp) is
401     // based on the instruction's offset in the trace and the dur (duration) is
402     // 1 since this layer contains single instructions. Using the instruction
403     // offset and a duration of 1 oversimplifies the true timing information of
404     // the trace, nonetheless, these approximate timestamps/durations provide an
405     // clear visualization of the trace.
406 
407     // ts: offset from the beginning of the trace for the first instruction in
408     // the block
409 
410     // dur: 1 since this layer contains single instructions.
411 
412     // pid: the ID of the HTR layer the blocks belong to
413 
414     // See
415     // https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.j75x71ritcoy
416     // for documentation on the Trace Event Format
417     layers_as_json.emplace_back(llvm::json::Object{
418         {"name", display_name},
419         {"ph", "X"},
420         {"ts", (int64_t)i},
421         {"dur", 1},
422         {"pid", (int64_t)layer_id},
423     });
424   }
425 
426   for (const auto &layer : htr.GetBlockLayers()) {
427     size_t start_ts = 0;
428     std::vector<size_t> block_id_trace = layer->GetBlockIdTrace();
429     for (size_t i = 0; i < block_id_trace.size(); i++) {
430       size_t id = block_id_trace[i];
431       // Guranteed that this ID is valid, so safe to dereference here.
432       HTRBlock block = *layer->GetBlockById(id);
433       llvm::json::Value block_json = toJSON(block);
434       size_t layer_id = layer->GetLayerId();
435 
436       HTRBlockMetadata metadata = block.GetMetadata();
437 
438       llvm::Optional<llvm::StringRef> most_freq_func =
439           metadata.GetMostFrequentlyCalledFunction();
440       std::stringstream stream;
441       stream << "0x" << std::hex << metadata.GetFirstInstructionLoadAddress();
442       std::string offset_hex_string(stream.str());
443       std::string display_name =
444           most_freq_func ? offset_hex_string + ": " + most_freq_func->str()
445                          : offset_hex_string;
446 
447       // Since trace timestamps aren't yet supported in HTR, the ts (timestamp)
448       // and dur (duration) are based on the block's offset in the trace and
449       // number of instructions in the block, respectively. Using the block
450       // offset and the number of instructions oversimplifies the true timing
451       // information of the trace, nonetheless, these approximate
452       // timestamps/durations provide an understandable visualization of the
453       // trace.
454       auto duration = metadata.GetNumInstructions();
455       layers_as_json.emplace_back(llvm::json::Object{
456           {"name", display_name},
457           {"ph", "X"},
458           {"ts", (int64_t)start_ts},
459           {"dur", (int64_t)duration},
460           {"pid", (int64_t)layer_id},
461           {"args", block_json},
462       });
463       start_ts += duration;
464     }
465   }
466   return layers_as_json;
467 }
468 
469 llvm::json::Value lldb_private::toJSON(const HTRBlock &block) {
470   return llvm::json::Value(
471       llvm::json::Object{{"Metadata", block.GetMetadata()}});
472 }
473 
474 llvm::json::Value lldb_private::toJSON(const HTRBlockMetadata &metadata) {
475   std::vector<llvm::json::Value> function_calls;
476   for (const auto &it : metadata.GetFunctionCalls()) {
477     ConstString name = it.first;
478     size_t n_calls = it.second;
479     function_calls.emplace_back(llvm::formatv("({0}: {1})", name, n_calls));
480   }
481 
482   return llvm::json::Value(llvm::json::Object{
483       {"Number of Instructions", (ssize_t)metadata.GetNumInstructions()},
484       {"Functions", function_calls}});
485 }
486