1 //===-- PerfContextSwitchDecoder.cpp --======------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 
8 #include "PerfContextSwitchDecoder.h"
9 
10 using namespace lldb;
11 using namespace lldb_private;
12 using namespace lldb_private::trace_intel_pt;
13 using namespace llvm;
14 
15 /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on
16 /// non-linux platforms.
17 /// \{
18 #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
19 #define PERF_RECORD_MAX 19
20 #define PERF_RECORD_SWITCH_CPU_WIDE 15
21 
22 struct perf_event_header {
23   uint32_t type;
24   uint16_t misc;
25   uint16_t size;
26 
27   /// \return
28   ///   An \a llvm::Error if the record looks obviously wrong, or \a
29   ///   llvm::Error::success() otherwise.
30   Error SanityCheck() const {
31     // The following checks are based on visual inspection of the records and
32     // enums in
33     // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h
34     // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records
35     // hold.
36 
37     // A record of too many uint64_t's or more should mean that the data is
38     // wrong
39     const uint64_t max_valid_size_bytes = 8000;
40     if (size == 0 || size > max_valid_size_bytes)
41       return createStringError(
42           inconvertibleErrorCode(),
43           formatv("A record of {0} bytes was found.", size));
44 
45     // We add some numbers to PERF_RECORD_MAX because some systems might have
46     // custom records. In any case, we are looking only for abnormal data.
47     if (type >= PERF_RECORD_MAX + 100)
48       return createStringError(
49           inconvertibleErrorCode(),
50           formatv("Invalid record type {0} was found.", type));
51     return Error::success();
52   }
53 
54   bool IsContextSwitchRecord() const {
55     return type == PERF_RECORD_SWITCH_CPU_WIDE;
56   }
57 };
58 /// \}
59 
60 /// Record found in the perf_event context switch traces. It might contain
61 /// additional fields in memory, but header.size should have the actual size
62 /// of the record.
63 struct PerfContextSwitchRecord {
64   struct perf_event_header header;
65   uint32_t next_prev_pid;
66   uint32_t next_prev_tid;
67   uint32_t pid, tid;
68   uint64_t time_in_nanos;
69 
70   bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; }
71 };
72 
73 /// Record produced after parsing the raw context switch trace produce by
74 /// perf_event. A major difference between this struct and
75 /// PerfContextSwitchRecord is that this one uses tsc instead of nanos.
76 struct ContextSwitchRecord {
77   uint64_t tsc;
78   /// Whether the switch is in or out
79   bool is_out;
80   /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally
81   /// runs after a context switch out of a normal user thread.
82   lldb::pid_t pid;
83   lldb::tid_t tid;
84 
85   bool IsOut() const { return is_out; }
86 
87   bool IsIn() const { return !is_out; }
88 };
89 
90 uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const {
91   switch (variant) {
92   case Variant::Complete:
93     return tscs.complete.start;
94   case Variant::OnlyStart:
95     return tscs.only_start.start;
96   case Variant::OnlyEnd:
97     return tscs.only_end.end;
98   case Variant::HintedEnd:
99     return tscs.hinted_end.start;
100   case Variant::HintedStart:
101     return tscs.hinted_start.end;
102   }
103 }
104 
105 uint64_t ThreadContinuousExecution::GetStartTSC() const {
106   switch (variant) {
107   case Variant::Complete:
108     return tscs.complete.start;
109   case Variant::OnlyStart:
110     return tscs.only_start.start;
111   case Variant::OnlyEnd:
112     return 0;
113   case Variant::HintedEnd:
114     return tscs.hinted_end.start;
115   case Variant::HintedStart:
116     return tscs.hinted_start.hinted_start;
117   }
118 }
119 
120 uint64_t ThreadContinuousExecution::GetEndTSC() const {
121   switch (variant) {
122   case Variant::Complete:
123     return tscs.complete.end;
124   case Variant::OnlyStart:
125     return std::numeric_limits<uint64_t>::max();
126   case Variant::OnlyEnd:
127     return tscs.only_end.end;
128   case Variant::HintedEnd:
129     return tscs.hinted_end.hinted_end;
130   case Variant::HintedStart:
131     return tscs.hinted_start.end;
132   }
133 }
134 
135 ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution(
136     lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
137     uint64_t end) {
138   ThreadContinuousExecution o(cpu_id, tid, pid);
139   o.variant = Variant::Complete;
140   o.tscs.complete.start = start;
141   o.tscs.complete.end = end;
142   return o;
143 }
144 
145 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution(
146     lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid,
147     uint64_t hinted_start, uint64_t end) {
148   ThreadContinuousExecution o(cpu_id, tid, pid);
149   o.variant = Variant::HintedStart;
150   o.tscs.hinted_start.hinted_start = hinted_start;
151   o.tscs.hinted_start.end = end;
152   return o;
153 }
154 
155 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution(
156     lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
157     uint64_t hinted_end) {
158   ThreadContinuousExecution o(cpu_id, tid, pid);
159   o.variant = Variant::HintedEnd;
160   o.tscs.hinted_end.start = start;
161   o.tscs.hinted_end.hinted_end = hinted_end;
162   return o;
163 }
164 
165 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution(
166     lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) {
167   ThreadContinuousExecution o(cpu_id, tid, pid);
168   o.variant = Variant::OnlyEnd;
169   o.tscs.only_end.end = end;
170   return o;
171 }
172 
173 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution(
174     lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) {
175   ThreadContinuousExecution o(cpu_id, tid, pid);
176   o.variant = Variant::OnlyStart;
177   o.tscs.only_start.start = start;
178   return o;
179 }
180 
181 static Error RecoverExecutionsFromConsecutiveRecords(
182     cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion,
183     const ContextSwitchRecord &current_record,
184     const Optional<ContextSwitchRecord> &prev_record,
185     std::function<void(const ThreadContinuousExecution &execution)>
186         on_new_execution) {
187   if (!prev_record) {
188     if (current_record.IsOut()) {
189       on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution(
190           cpu_id, current_record.tid, current_record.pid, current_record.tsc));
191     }
192     // The 'in' case will be handled later when we try to look for its end
193     return Error::success();
194   }
195 
196   const ContextSwitchRecord &prev = *prev_record;
197   if (prev.tsc >= current_record.tsc)
198     return createStringError(
199         inconvertibleErrorCode(),
200         formatv("A context switch record doesn't happen after the previous "
201                 "record. Previous TSC= {0}, current TSC = {1}.",
202                 prev.tsc, current_record.tsc));
203 
204   if (current_record.IsIn() && prev.IsIn()) {
205     // We found two consecutive ins, which means that we didn't capture
206     // the end of the previous execution.
207     on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
208         cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
209   } else if (current_record.IsOut() && prev.IsOut()) {
210     // We found two consecutive outs, that means that we didn't capture
211     // the beginning of the current execution.
212     on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
213         cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
214         current_record.tsc));
215   } else if (current_record.IsOut() && prev.IsIn()) {
216     if (current_record.pid == prev.pid && current_record.tid == prev.tid) {
217       /// A complete execution
218       on_new_execution(ThreadContinuousExecution::CreateCompleteExecution(
219           cpu_id, current_record.tid, current_record.pid, prev.tsc,
220           current_record.tsc));
221     } else {
222       // An out after the in of a different thread. The first one doesn't
223       // have an end, and the second one doesn't have a start.
224       on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
225           cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
226       on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
227           cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
228           current_record.tsc));
229     }
230   }
231   return Error::success();
232 }
233 
234 Expected<std::vector<ThreadContinuousExecution>>
235 lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace(
236     ArrayRef<uint8_t> data, cpu_id_t cpu_id,
237     const LinuxPerfZeroTscConversion &tsc_conversion) {
238 
239   std::vector<ThreadContinuousExecution> executions;
240 
241   // This offset is used to create the error message in case of failures.
242   size_t offset = 0;
243 
244   auto do_decode = [&]() -> Error {
245     Optional<ContextSwitchRecord> prev_record;
246     while (offset < data.size()) {
247       const perf_event_header &perf_record =
248           *reinterpret_cast<const perf_event_header *>(data.data() + offset);
249       if (Error err = perf_record.SanityCheck())
250         return err;
251 
252       if (perf_record.IsContextSwitchRecord()) {
253         const PerfContextSwitchRecord &context_switch_record =
254             *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
255                                                                offset);
256         ContextSwitchRecord record{
257             tsc_conversion.ToTSC(context_switch_record.time_in_nanos),
258             context_switch_record.IsOut(),
259             static_cast<lldb::pid_t>(context_switch_record.pid),
260             static_cast<lldb::tid_t>(context_switch_record.tid)};
261 
262         if (Error err = RecoverExecutionsFromConsecutiveRecords(
263                 cpu_id, tsc_conversion, record, prev_record,
264                 [&](const ThreadContinuousExecution &execution) {
265                   executions.push_back(execution);
266                 }))
267           return err;
268 
269         prev_record = record;
270       }
271       offset += perf_record.size;
272     }
273 
274     // We might have an incomplete last record
275     if (prev_record && prev_record->IsIn())
276       executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution(
277           cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc));
278     return Error::success();
279   };
280 
281   if (Error err = do_decode())
282     return createStringError(inconvertibleErrorCode(),
283                              formatv("Malformed perf context switch trace for "
284                                      "cpu {0} at offset {1}. {2}",
285                                      cpu_id, offset, toString(std::move(err))));
286 
287   return executions;
288 }
289