1 //===-- TraceHTR.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "TraceHTR.h"
10 
11 #include "lldb/Symbol/Function.h"
12 #include "lldb/Target/Process.h"
13 #include "lldb/Target/Target.h"
14 #include "llvm/Support/JSON.h"
15 #include <optional>
16 #include <sstream>
17 #include <string>
18 
19 using namespace lldb_private;
20 using namespace lldb;
21 
22 size_t HTRBlockMetadata::GetNumInstructions() const {
23   return m_num_instructions;
24 }
25 
26 std::optional<llvm::StringRef>
27 HTRBlockMetadata::GetMostFrequentlyCalledFunction() const {
28   size_t max_ncalls = 0;
29   std::optional<llvm::StringRef> max_name;
30   for (const auto &it : m_func_calls) {
31     ConstString name = it.first;
32     size_t ncalls = it.second;
33     if (ncalls > max_ncalls) {
34       max_ncalls = ncalls;
35       max_name = name.GetStringRef();
36     }
37   }
38   return max_name;
39 }
40 
41 llvm::DenseMap<ConstString, size_t> const &
42 HTRBlockMetadata::GetFunctionCalls() const {
43   return m_func_calls;
44 }
45 
46 lldb::addr_t HTRBlockMetadata::GetFirstInstructionLoadAddress() const {
47   return m_first_instruction_load_address;
48 }
49 
50 size_t HTRBlock::GetOffset() const { return m_offset; }
51 
52 size_t HTRBlock::GetSize() const { return m_size; }
53 
54 HTRBlockMetadata const &HTRBlock::GetMetadata() const { return m_metadata; }
55 
56 llvm::ArrayRef<HTRBlockLayerUP> TraceHTR::GetBlockLayers() const {
57   return m_block_layer_ups;
58 }
59 
60 HTRInstructionLayer const &TraceHTR::GetInstructionLayer() const {
61   return *m_instruction_layer_up;
62 }
63 
64 void TraceHTR::AddNewBlockLayer(HTRBlockLayerUP &&block_layer) {
65   m_block_layer_ups.emplace_back(std::move(block_layer));
66 }
67 
68 size_t IHTRLayer::GetLayerId() const { return m_layer_id; }
69 
70 void HTRBlockLayer::AppendNewBlock(size_t block_id, HTRBlock &&block) {
71   m_block_id_trace.emplace_back(block_id);
72   m_block_defs.emplace(block_id, std::move(block));
73 }
74 
75 void HTRBlockLayer::AppendRepeatedBlock(size_t block_id) {
76   m_block_id_trace.emplace_back(block_id);
77 }
78 
79 llvm::ArrayRef<lldb::addr_t> HTRInstructionLayer::GetInstructionTrace() const {
80   return m_instruction_trace;
81 }
82 
83 void HTRInstructionLayer::AddCallInstructionMetadata(
84     lldb::addr_t load_addr, std::optional<ConstString> func_name) {
85   m_call_isns.emplace(load_addr, func_name);
86 }
87 
88 void HTRInstructionLayer::AppendInstruction(lldb::addr_t load_addr) {
89   m_instruction_trace.emplace_back(load_addr);
90 }
91 
92 HTRBlock const *HTRBlockLayer::GetBlockById(size_t block_id) const {
93   auto block_it = m_block_defs.find(block_id);
94   if (block_it == m_block_defs.end())
95     return nullptr;
96   else
97     return &block_it->second;
98 }
99 
100 llvm::ArrayRef<size_t> HTRBlockLayer::GetBlockIdTrace() const {
101   return m_block_id_trace;
102 }
103 
104 size_t HTRBlockLayer::GetNumUnits() const { return m_block_id_trace.size(); }
105 
106 HTRBlockMetadata HTRInstructionLayer::GetMetadataByIndex(size_t index) const {
107   lldb::addr_t instruction_load_address = m_instruction_trace[index];
108   llvm::DenseMap<ConstString, size_t> func_calls;
109 
110   auto func_name_it = m_call_isns.find(instruction_load_address);
111   if (func_name_it != m_call_isns.end()) {
112     if (std::optional<ConstString> func_name = func_name_it->second) {
113       func_calls[*func_name] = 1;
114     }
115   }
116   return {instruction_load_address, 1, std::move(func_calls)};
117 }
118 
119 size_t HTRInstructionLayer::GetNumUnits() const {
120   return m_instruction_trace.size();
121 }
122 
123 HTRBlockMetadata HTRBlockLayer::GetMetadataByIndex(size_t index) const {
124   size_t block_id = m_block_id_trace[index];
125   HTRBlock block = m_block_defs.find(block_id)->second;
126   return block.GetMetadata();
127 }
128 
129 TraceHTR::TraceHTR(Thread &thread, TraceCursor &cursor)
130     : m_instruction_layer_up(std::make_unique<HTRInstructionLayer>(0)) {
131 
132   // Move cursor to the first instruction in the trace
133   cursor.SetForwards(true);
134   cursor.Seek(0, lldb::eTraceCursorSeekTypeBeginning);
135 
136   // TODO: fix after persona0220's patch on a new way to access instruction
137   // kinds
138   /*
139   Target &target = thread.GetProcess()->GetTarget();
140   auto function_name_from_load_address =
141       [&](lldb::addr_t load_address) -> std::optional<ConstString> {
142     lldb_private::Address pc_addr;
143     SymbolContext sc;
144     if (target.ResolveLoadAddress(load_address, pc_addr) &&
145         pc_addr.CalculateSymbolContext(&sc))
146       return sc.GetFunctionName()
147                  ? std::optional<ConstString>(sc.GetFunctionName())
148                  : std::nullopt;
149     else
150       return std::nullopt;
151   };
152 
153   while (cursor.HasValue()) { if (cursor.IsError()) {
154       // Append a load address of 0 for all instructions that an error occured
155       // while decoding.
156       // TODO: Make distinction between errors by storing the error messages.
157       // Currently, all errors are treated the same.
158       m_instruction_layer_up->AppendInstruction(0);
159       cursor.Next();
160     } else if (cursor.IsEvent()) {
161       cursor.Next();
162     } else {
163       lldb::addr_t current_instruction_load_address = cursor.GetLoadAddress();
164       lldb::InstructionControlFlowKind current_instruction_type =
165           cursor.GetInstructionControlFlowKind();
166 
167       m_instruction_layer_up->AppendInstruction(
168           current_instruction_load_address);
169       cursor.Next();
170       bool more_data_in_trace = cursor.HasValue();
171       if (current_instruction_type &
172           lldb::eInstructionControlFlowKindCall) {
173         if (more_data_in_trace && !cursor.IsError()) {
174           m_instruction_layer_up->AddCallInstructionMetadata(
175               current_instruction_load_address,
176               function_name_from_load_address(cursor.GetLoadAddress()));
177         } else {
178           // Next instruction is not known - pass None to indicate the name
179           // of the function being called is not known
180           m_instruction_layer_up->AddCallInstructionMetadata(
181               current_instruction_load_address, std::nullopt);
182         }
183       }
184     }
185   }
186   */
187 }
188 
189 void HTRBlockMetadata::MergeMetadata(
190     HTRBlockMetadata &merged_metadata,
191     HTRBlockMetadata const &metadata_to_merge) {
192   merged_metadata.m_num_instructions += metadata_to_merge.m_num_instructions;
193   for (const auto &it : metadata_to_merge.m_func_calls) {
194     ConstString name = it.first;
195     size_t num_calls = it.second;
196     merged_metadata.m_func_calls[name] += num_calls;
197   }
198 }
199 
200 HTRBlock IHTRLayer::MergeUnits(size_t start_unit_index, size_t num_units) {
201   // TODO: make this function take `end_unit_index` as a parameter instead of
202   // unit and merge the range [start_unit_indx, end_unit_index] inclusive.
203   HTRBlockMetadata merged_metadata = GetMetadataByIndex(start_unit_index);
204   for (size_t i = start_unit_index + 1; i < start_unit_index + num_units; i++) {
205     // merge the new metadata into merged_metadata
206     HTRBlockMetadata::MergeMetadata(merged_metadata, GetMetadataByIndex(i));
207   }
208   return {start_unit_index, num_units, merged_metadata};
209 }
210 
211 void TraceHTR::ExecutePasses() {
212   auto are_passes_done = [](IHTRLayer &l1, IHTRLayer &l2) {
213     return l1.GetNumUnits() == l2.GetNumUnits();
214   };
215   HTRBlockLayerUP current_block_layer_up =
216       BasicSuperBlockMerge(*m_instruction_layer_up);
217   HTRBlockLayer &current_block_layer = *current_block_layer_up;
218   if (are_passes_done(*m_instruction_layer_up, *current_block_layer_up))
219     return;
220 
221   AddNewBlockLayer(std::move(current_block_layer_up));
222   while (true) {
223     HTRBlockLayerUP new_block_layer_up =
224         BasicSuperBlockMerge(current_block_layer);
225     if (are_passes_done(current_block_layer, *new_block_layer_up))
226       return;
227 
228     current_block_layer = *new_block_layer_up;
229     AddNewBlockLayer(std::move(new_block_layer_up));
230   }
231 }
232 
233 llvm::Error TraceHTR::Export(std::string outfile) {
234   std::error_code ec;
235   llvm::raw_fd_ostream os(outfile, ec, llvm::sys::fs::OF_Text);
236   if (ec) {
237     return llvm::make_error<llvm::StringError>(
238         "unable to open destination file: " + outfile, os.error());
239   } else {
240     os << toJSON(*this);
241     os.close();
242     if (os.has_error()) {
243       return llvm::make_error<llvm::StringError>(
244           "unable to write to destination file: " + outfile, os.error());
245     }
246   }
247   return llvm::Error::success();
248 }
249 
250 HTRBlockLayerUP lldb_private::BasicSuperBlockMerge(IHTRLayer &layer) {
251   std::unique_ptr<HTRBlockLayer> new_block_layer =
252       std::make_unique<HTRBlockLayer>(layer.GetLayerId() + 1);
253 
254   if (layer.GetNumUnits()) {
255     // Future Improvement: split this into two functions - one for finding heads
256     // and tails, one for merging/creating the next layer A 'head' is defined to
257     // be a block whose occurrences in the trace do not have a unique preceding
258     // block.
259     std::unordered_set<size_t> heads;
260 
261     // The load address of the first instruction of a block is the unique ID for
262     // that block (i.e. blocks with the same first instruction load address are
263     // the same block)
264 
265     // Future Improvement: no need to store all its preceding block ids, all we
266     // care about is that there is more than one preceding block id, so an enum
267     // could be used
268     std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> head_map;
269     lldb::addr_t prev_id =
270         layer.GetMetadataByIndex(0).GetFirstInstructionLoadAddress();
271     size_t num_units = layer.GetNumUnits();
272     // This excludes the first unit since it has no previous unit
273     for (size_t i = 1; i < num_units; i++) {
274       lldb::addr_t current_id =
275           layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
276       head_map[current_id].insert(prev_id);
277       prev_id = current_id;
278     }
279     for (const auto &it : head_map) {
280       // ID of 0 represents an error - errors can't be heads or tails
281       lldb::addr_t id = it.first;
282       const std::unordered_set<lldb::addr_t> predecessor_set = it.second;
283       if (id && predecessor_set.size() > 1)
284         heads.insert(id);
285     }
286 
287     // Future Improvement: identify heads and tails in the same loop
288     // A 'tail' is defined to be a block whose occurrences in the trace do
289     // not have a unique succeeding block.
290     std::unordered_set<lldb::addr_t> tails;
291     std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> tail_map;
292 
293     // This excludes the last unit since it has no next unit
294     for (size_t i = 0; i < num_units - 1; i++) {
295       lldb::addr_t current_id =
296           layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
297       lldb::addr_t next_id =
298           layer.GetMetadataByIndex(i + 1).GetFirstInstructionLoadAddress();
299       tail_map[current_id].insert(next_id);
300     }
301 
302     // Mark last block as tail so the algorithm stops gracefully
303     lldb::addr_t last_id = layer.GetMetadataByIndex(num_units - 1)
304                                .GetFirstInstructionLoadAddress();
305     tails.insert(last_id);
306     for (const auto &it : tail_map) {
307       lldb::addr_t id = it.first;
308       const std::unordered_set<lldb::addr_t> successor_set = it.second;
309       // ID of 0 represents an error - errors can't be heads or tails
310       if (id && successor_set.size() > 1)
311         tails.insert(id);
312     }
313 
314     // Need to keep track of size of string since things we push are variable
315     // length
316     size_t superblock_size = 0;
317     // Each super block always has the same first unit (we call this the
318     // super block head) This gurantee allows us to use the super block head as
319     // the unique key mapping to the super block it begins
320     std::optional<size_t> superblock_head;
321     auto construct_next_layer = [&](size_t merge_start, size_t n) -> void {
322       if (!superblock_head)
323         return;
324       if (new_block_layer->GetBlockById(*superblock_head)) {
325         new_block_layer->AppendRepeatedBlock(*superblock_head);
326       } else {
327         HTRBlock new_block = layer.MergeUnits(merge_start, n);
328         new_block_layer->AppendNewBlock(*superblock_head, std::move(new_block));
329       }
330     };
331 
332     for (size_t i = 0; i < num_units; i++) {
333       lldb::addr_t unit_id =
334           layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
335       auto isHead = heads.count(unit_id) > 0;
336       auto isTail = tails.count(unit_id) > 0;
337 
338       if (isHead && isTail) {
339         // Head logic
340         if (superblock_size) { // this handles (tail, head) adjacency -
341                                // otherwise an empty
342                                // block is created
343           // End previous super block
344           construct_next_layer(i - superblock_size, superblock_size);
345         }
346         // Current id is first in next super block since it's a head
347         superblock_head = unit_id;
348         superblock_size = 1;
349 
350         // Tail logic
351         construct_next_layer(i - superblock_size + 1, superblock_size);
352         // Reset the block_head since the prev super block has come to and end
353         superblock_head = std::nullopt;
354         superblock_size = 0;
355       } else if (isHead) {
356         if (superblock_size) { // this handles (tail, head) adjacency -
357                                // otherwise an empty
358                                // block is created
359           // End previous super block
360           construct_next_layer(i - superblock_size, superblock_size);
361         }
362         // Current id is first in next super block since it's a head
363         superblock_head = unit_id;
364         superblock_size = 1;
365       } else if (isTail) {
366         if (!superblock_head)
367           superblock_head = unit_id;
368         superblock_size++;
369 
370         // End previous super block
371         construct_next_layer(i - superblock_size + 1, superblock_size);
372         // Reset the block_head since the prev super block has come to and end
373         superblock_head = std::nullopt;
374         superblock_size = 0;
375       } else {
376         if (!superblock_head)
377           superblock_head = unit_id;
378         superblock_size++;
379       }
380     }
381   }
382   return new_block_layer;
383 }
384 
385 llvm::json::Value lldb_private::toJSON(const TraceHTR &htr) {
386   std::vector<llvm::json::Value> layers_as_json;
387   for (size_t i = 0; i < htr.GetInstructionLayer().GetInstructionTrace().size();
388        i++) {
389     size_t layer_id = htr.GetInstructionLayer().GetLayerId();
390     HTRBlockMetadata metadata = htr.GetInstructionLayer().GetMetadataByIndex(i);
391     lldb::addr_t load_address = metadata.GetFirstInstructionLoadAddress();
392 
393     std::string display_name;
394 
395     std::stringstream stream;
396     stream << "0x" << std::hex << load_address;
397     std::string load_address_hex_string(stream.str());
398     display_name.assign(load_address_hex_string);
399 
400     // name: load address of the first instruction of the block and the name
401     // of the most frequently called function from the block (if applicable)
402 
403     // ph: the event type - 'X' for Complete events (see link to documentation
404     // below)
405 
406     // Since trace timestamps aren't yet supported in HTR, the ts (timestamp) is
407     // based on the instruction's offset in the trace and the dur (duration) is
408     // 1 since this layer contains single instructions. Using the instruction
409     // offset and a duration of 1 oversimplifies the true timing information of
410     // the trace, nonetheless, these approximate timestamps/durations provide an
411     // clear visualization of the trace.
412 
413     // ts: offset from the beginning of the trace for the first instruction in
414     // the block
415 
416     // dur: 1 since this layer contains single instructions.
417 
418     // pid: the ID of the HTR layer the blocks belong to
419 
420     // See
421     // https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.j75x71ritcoy
422     // for documentation on the Trace Event Format
423     layers_as_json.emplace_back(llvm::json::Object{
424         {"name", display_name},
425         {"ph", "X"},
426         {"ts", (int64_t)i},
427         {"dur", 1},
428         {"pid", (int64_t)layer_id},
429     });
430   }
431 
432   for (const auto &layer : htr.GetBlockLayers()) {
433     size_t start_ts = 0;
434     std::vector<size_t> block_id_trace = layer->GetBlockIdTrace();
435     for (size_t i = 0; i < block_id_trace.size(); i++) {
436       size_t id = block_id_trace[i];
437       // Guranteed that this ID is valid, so safe to dereference here.
438       HTRBlock block = *layer->GetBlockById(id);
439       llvm::json::Value block_json = toJSON(block);
440       size_t layer_id = layer->GetLayerId();
441 
442       HTRBlockMetadata metadata = block.GetMetadata();
443 
444       std::optional<llvm::StringRef> most_freq_func =
445           metadata.GetMostFrequentlyCalledFunction();
446       std::stringstream stream;
447       stream << "0x" << std::hex << metadata.GetFirstInstructionLoadAddress();
448       std::string offset_hex_string(stream.str());
449       std::string display_name =
450           most_freq_func ? offset_hex_string + ": " + most_freq_func->str()
451                          : offset_hex_string;
452 
453       // Since trace timestamps aren't yet supported in HTR, the ts (timestamp)
454       // and dur (duration) are based on the block's offset in the trace and
455       // number of instructions in the block, respectively. Using the block
456       // offset and the number of instructions oversimplifies the true timing
457       // information of the trace, nonetheless, these approximate
458       // timestamps/durations provide an understandable visualization of the
459       // trace.
460       auto duration = metadata.GetNumInstructions();
461       layers_as_json.emplace_back(llvm::json::Object{
462           {"name", display_name},
463           {"ph", "X"},
464           {"ts", (int64_t)start_ts},
465           {"dur", (int64_t)duration},
466           {"pid", (int64_t)layer_id},
467           {"args", block_json},
468       });
469       start_ts += duration;
470     }
471   }
472   return layers_as_json;
473 }
474 
475 llvm::json::Value lldb_private::toJSON(const HTRBlock &block) {
476   return llvm::json::Value(
477       llvm::json::Object{{"Metadata", block.GetMetadata()}});
478 }
479 
480 llvm::json::Value lldb_private::toJSON(const HTRBlockMetadata &metadata) {
481   std::vector<llvm::json::Value> function_calls;
482   for (const auto &it : metadata.GetFunctionCalls()) {
483     ConstString name = it.first;
484     size_t n_calls = it.second;
485     function_calls.emplace_back(llvm::formatv("({0}: {1})", name, n_calls));
486   }
487 
488   return llvm::json::Value(llvm::json::Object{
489       {"Number of Instructions", (ssize_t)metadata.GetNumInstructions()},
490       {"Functions", function_calls}});
491 }
492