1 //===-- TraceIntelPTMultiCpuDecoder.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "TraceIntelPTMultiCpuDecoder.h"
10 #include "TraceIntelPT.h"
11 #include "llvm/Support/Error.h"
12 #include <optional>
13 
14 using namespace lldb;
15 using namespace lldb_private;
16 using namespace lldb_private::trace_intel_pt;
17 using namespace llvm;
18 
19 TraceIntelPTMultiCpuDecoder::TraceIntelPTMultiCpuDecoder(
20     TraceIntelPTSP trace_sp)
21     : m_trace_wp(trace_sp) {
22   for (Process *proc : trace_sp->GetAllProcesses()) {
23     for (ThreadSP thread_sp : proc->GetThreadList().Threads()) {
24       m_tids.insert(thread_sp->GetID());
25     }
26   }
27 }
28 
29 TraceIntelPTSP TraceIntelPTMultiCpuDecoder::GetTrace() {
30   return m_trace_wp.lock();
31 }
32 
33 bool TraceIntelPTMultiCpuDecoder::TracesThread(lldb::tid_t tid) const {
34   return m_tids.count(tid);
35 }
36 
37 Expected<std::optional<uint64_t>> TraceIntelPTMultiCpuDecoder::FindLowestTSC() {
38   std::optional<uint64_t> lowest_tsc;
39   TraceIntelPTSP trace_sp = GetTrace();
40 
41   Error err = GetTrace()->OnAllCpusBinaryDataRead(
42       IntelPTDataKinds::kIptTrace,
43       [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
44         for (auto &cpu_id_to_buffer : buffers) {
45           Expected<std::optional<uint64_t>> tsc =
46               FindLowestTSCInTrace(*trace_sp, cpu_id_to_buffer.second);
47           if (!tsc)
48             return tsc.takeError();
49           if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc))
50             lowest_tsc = **tsc;
51         }
52         return Error::success();
53       });
54   if (err)
55     return std::move(err);
56   return lowest_tsc;
57 }
58 
59 Expected<DecodedThreadSP> TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) {
60   if (Error err = CorrelateContextSwitchesAndIntelPtTraces())
61     return std::move(err);
62 
63   TraceIntelPTSP trace_sp = GetTrace();
64 
65   return trace_sp->GetThreadTimer(thread.GetID())
66       .TimeTask("Decoding instructions", [&]() -> Expected<DecodedThreadSP> {
67         auto it = m_decoded_threads.find(thread.GetID());
68         if (it != m_decoded_threads.end())
69           return it->second;
70 
71         DecodedThreadSP decoded_thread_sp = std::make_shared<DecodedThread>(
72             thread.shared_from_this(), trace_sp->GetPerfZeroTscConversion());
73 
74         Error err = trace_sp->OnAllCpusBinaryDataRead(
75             IntelPTDataKinds::kIptTrace,
76             [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
77               auto it =
78                   m_continuous_executions_per_thread->find(thread.GetID());
79               if (it != m_continuous_executions_per_thread->end())
80                 return DecodeSystemWideTraceForThread(
81                     *decoded_thread_sp, *trace_sp, buffers, it->second);
82 
83               return Error::success();
84             });
85         if (err)
86           return std::move(err);
87 
88         m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp);
89         return decoded_thread_sp;
90       });
91 }
92 
93 static Expected<std::vector<PSBBlock>> GetPSBBlocksForCPU(TraceIntelPT &trace,
94                                                           cpu_id_t cpu_id) {
95   std::vector<PSBBlock> psb_blocks;
96   Error err = trace.OnCpuBinaryDataRead(
97       cpu_id, IntelPTDataKinds::kIptTrace,
98       [&](ArrayRef<uint8_t> data) -> Error {
99         Expected<std::vector<PSBBlock>> split_trace =
100             SplitTraceIntoPSBBlock(trace, data, /*expect_tscs=*/true);
101         if (!split_trace)
102           return split_trace.takeError();
103 
104         psb_blocks = std::move(*split_trace);
105         return Error::success();
106       });
107   if (err)
108     return std::move(err);
109   return psb_blocks;
110 }
111 
112 Expected<DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>>
113 TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
114   DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>
115       continuous_executions_per_thread;
116   TraceIntelPTSP trace_sp = GetTrace();
117 
118   std::optional<LinuxPerfZeroTscConversion> conv_opt =
119       trace_sp->GetPerfZeroTscConversion();
120   if (!conv_opt)
121     return createStringError(
122         inconvertibleErrorCode(),
123         "TSC to nanoseconds conversion values were not found");
124 
125   LinuxPerfZeroTscConversion tsc_conversion = *conv_opt;
126 
127   for (cpu_id_t cpu_id : trace_sp->GetTracedCpus()) {
128     Expected<std::vector<PSBBlock>> psb_blocks =
129         GetPSBBlocksForCPU(*trace_sp, cpu_id);
130     if (!psb_blocks)
131       return psb_blocks.takeError();
132 
133     m_total_psb_blocks += psb_blocks->size();
134     // We'll be iterating through the thread continuous executions and the intel
135     // pt subtraces sorted by time.
136     auto it = psb_blocks->begin();
137     auto on_new_thread_execution =
138         [&](const ThreadContinuousExecution &thread_execution) {
139           IntelPTThreadContinousExecution execution(thread_execution);
140 
141           for (; it != psb_blocks->end() &&
142                  *it->tsc < thread_execution.GetEndTSC();
143                it++) {
144             if (*it->tsc > thread_execution.GetStartTSC()) {
145               execution.psb_blocks.push_back(*it);
146             } else {
147               m_unattributed_psb_blocks++;
148             }
149           }
150           continuous_executions_per_thread[thread_execution.tid].push_back(
151               execution);
152         };
153     Error err = trace_sp->OnCpuBinaryDataRead(
154         cpu_id, IntelPTDataKinds::kPerfContextSwitchTrace,
155         [&](ArrayRef<uint8_t> data) -> Error {
156           Expected<std::vector<ThreadContinuousExecution>> executions =
157               DecodePerfContextSwitchTrace(data, cpu_id, tsc_conversion);
158           if (!executions)
159             return executions.takeError();
160           for (const ThreadContinuousExecution &exec : *executions)
161             on_new_thread_execution(exec);
162           return Error::success();
163         });
164     if (err)
165       return std::move(err);
166 
167     m_unattributed_psb_blocks += psb_blocks->end() - it;
168   }
169   // We now sort the executions of each thread to have them ready for
170   // instruction decoding
171   for (auto &tid_executions : continuous_executions_per_thread)
172     std::sort(tid_executions.second.begin(), tid_executions.second.end());
173 
174   return continuous_executions_per_thread;
175 }
176 
177 Error TraceIntelPTMultiCpuDecoder::CorrelateContextSwitchesAndIntelPtTraces() {
178   if (m_setup_error)
179     return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());
180 
181   if (m_continuous_executions_per_thread)
182     return Error::success();
183 
184   Error err = GetTrace()->GetGlobalTimer().TimeTask(
185       "Context switch and Intel PT traces correlation", [&]() -> Error {
186         if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) {
187           m_continuous_executions_per_thread.emplace(std::move(*correlation));
188           return Error::success();
189         } else {
190           return correlation.takeError();
191         }
192       });
193   if (err) {
194     m_setup_error = toString(std::move(err));
195     return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());
196   }
197   return Error::success();
198 }
199 
200 size_t TraceIntelPTMultiCpuDecoder::GetNumContinuousExecutionsForThread(
201     lldb::tid_t tid) const {
202   if (!m_continuous_executions_per_thread)
203     return 0;
204   auto it = m_continuous_executions_per_thread->find(tid);
205   if (it == m_continuous_executions_per_thread->end())
206     return 0;
207   return it->second.size();
208 }
209 
210 size_t TraceIntelPTMultiCpuDecoder::GetTotalContinuousExecutionsCount() const {
211   if (!m_continuous_executions_per_thread)
212     return 0;
213   size_t count = 0;
214   for (const auto &kv : *m_continuous_executions_per_thread)
215     count += kv.second.size();
216   return count;
217 }
218 
219 size_t
220 TraceIntelPTMultiCpuDecoder::GePSBBlocksCountForThread(lldb::tid_t tid) const {
221   if (!m_continuous_executions_per_thread)
222     return 0;
223   size_t count = 0;
224   auto it = m_continuous_executions_per_thread->find(tid);
225   if (it == m_continuous_executions_per_thread->end())
226     return 0;
227   for (const IntelPTThreadContinousExecution &execution : it->second)
228     count += execution.psb_blocks.size();
229   return count;
230 }
231 
232 size_t TraceIntelPTMultiCpuDecoder::GetUnattributedPSBBlocksCount() const {
233   return m_unattributed_psb_blocks;
234 }
235 
236 size_t TraceIntelPTMultiCpuDecoder::GetTotalPSBBlocksCount() const {
237   return m_total_psb_blocks;
238 }
239