1 //===-- PerfContextSwitchDecoder.cpp --======------------------------------===// 2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3 // See https://llvm.org/LICENSE.txt for license information. 4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5 // 6 //===----------------------------------------------------------------------===// 7 8 #include "PerfContextSwitchDecoder.h" 9 #include <optional> 10 11 using namespace lldb; 12 using namespace lldb_private; 13 using namespace lldb_private::trace_intel_pt; 14 using namespace llvm; 15 16 /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on 17 /// non-linux platforms. 18 /// \{ 19 #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) 20 21 #define PERF_RECORD_LOST 2 22 #define PERF_RECORD_THROTTLE 5 23 #define PERF_RECORD_UNTHROTTLE 6 24 #define PERF_RECORD_LOST_SAMPLES 13 25 #define PERF_RECORD_SWITCH_CPU_WIDE 15 26 #define PERF_RECORD_MAX 19 27 28 struct perf_event_header { 29 uint32_t type; 30 uint16_t misc; 31 uint16_t size; 32 33 /// \return 34 /// An \a llvm::Error if the record looks obviously wrong, or \a 35 /// llvm::Error::success() otherwise. 36 Error SanityCheck() const { 37 // The following checks are based on visual inspection of the records and 38 // enums in 39 // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h 40 // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records 41 // hold. 42 43 // A record of too many uint64_t's or more should mean that the data is 44 // wrong 45 const uint64_t max_valid_size_bytes = 8000; 46 if (size == 0 || size > max_valid_size_bytes) 47 return createStringError( 48 inconvertibleErrorCode(), 49 formatv("A record of {0} bytes was found.", size)); 50 51 // We add some numbers to PERF_RECORD_MAX because some systems might have 52 // custom records. In any case, we are looking only for abnormal data. 53 if (type >= PERF_RECORD_MAX + 100) 54 return createStringError( 55 inconvertibleErrorCode(), 56 formatv("Invalid record type {0} was found.", type)); 57 return Error::success(); 58 } 59 60 bool IsContextSwitchRecord() const { 61 return type == PERF_RECORD_SWITCH_CPU_WIDE; 62 } 63 64 bool IsErrorRecord() const { 65 return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE || 66 type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES; 67 } 68 }; 69 /// \} 70 71 /// Record found in the perf_event context switch traces. It might contain 72 /// additional fields in memory, but header.size should have the actual size 73 /// of the record. 74 struct PerfContextSwitchRecord { 75 struct perf_event_header header; 76 uint32_t next_prev_pid; 77 uint32_t next_prev_tid; 78 uint32_t pid, tid; 79 uint64_t time_in_nanos; 80 81 bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } 82 }; 83 84 /// Record produced after parsing the raw context switch trace produce by 85 /// perf_event. A major difference between this struct and 86 /// PerfContextSwitchRecord is that this one uses tsc instead of nanos. 87 struct ContextSwitchRecord { 88 uint64_t tsc; 89 /// Whether the switch is in or out 90 bool is_out; 91 /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally 92 /// runs after a context switch out of a normal user thread. 93 lldb::pid_t pid; 94 lldb::tid_t tid; 95 96 bool IsOut() const { return is_out; } 97 98 bool IsIn() const { return !is_out; } 99 }; 100 101 uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { 102 switch (variant) { 103 case Variant::Complete: 104 return tscs.complete.start; 105 case Variant::OnlyStart: 106 return tscs.only_start.start; 107 case Variant::OnlyEnd: 108 return tscs.only_end.end; 109 case Variant::HintedEnd: 110 return tscs.hinted_end.start; 111 case Variant::HintedStart: 112 return tscs.hinted_start.end; 113 } 114 } 115 116 uint64_t ThreadContinuousExecution::GetStartTSC() const { 117 switch (variant) { 118 case Variant::Complete: 119 return tscs.complete.start; 120 case Variant::OnlyStart: 121 return tscs.only_start.start; 122 case Variant::OnlyEnd: 123 return 0; 124 case Variant::HintedEnd: 125 return tscs.hinted_end.start; 126 case Variant::HintedStart: 127 return tscs.hinted_start.hinted_start; 128 } 129 } 130 131 uint64_t ThreadContinuousExecution::GetEndTSC() const { 132 switch (variant) { 133 case Variant::Complete: 134 return tscs.complete.end; 135 case Variant::OnlyStart: 136 return std::numeric_limits<uint64_t>::max(); 137 case Variant::OnlyEnd: 138 return tscs.only_end.end; 139 case Variant::HintedEnd: 140 return tscs.hinted_end.hinted_end; 141 case Variant::HintedStart: 142 return tscs.hinted_start.end; 143 } 144 } 145 146 ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( 147 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 148 uint64_t end) { 149 ThreadContinuousExecution o(cpu_id, tid, pid); 150 o.variant = Variant::Complete; 151 o.tscs.complete.start = start; 152 o.tscs.complete.end = end; 153 return o; 154 } 155 156 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( 157 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, 158 uint64_t hinted_start, uint64_t end) { 159 ThreadContinuousExecution o(cpu_id, tid, pid); 160 o.variant = Variant::HintedStart; 161 o.tscs.hinted_start.hinted_start = hinted_start; 162 o.tscs.hinted_start.end = end; 163 return o; 164 } 165 166 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( 167 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 168 uint64_t hinted_end) { 169 ThreadContinuousExecution o(cpu_id, tid, pid); 170 o.variant = Variant::HintedEnd; 171 o.tscs.hinted_end.start = start; 172 o.tscs.hinted_end.hinted_end = hinted_end; 173 return o; 174 } 175 176 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( 177 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { 178 ThreadContinuousExecution o(cpu_id, tid, pid); 179 o.variant = Variant::OnlyEnd; 180 o.tscs.only_end.end = end; 181 return o; 182 } 183 184 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( 185 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { 186 ThreadContinuousExecution o(cpu_id, tid, pid); 187 o.variant = Variant::OnlyStart; 188 o.tscs.only_start.start = start; 189 return o; 190 } 191 192 static Error RecoverExecutionsFromConsecutiveRecords( 193 cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion, 194 const ContextSwitchRecord ¤t_record, 195 const std::optional<ContextSwitchRecord> &prev_record, 196 std::function<void(const ThreadContinuousExecution &execution)> 197 on_new_execution) { 198 if (!prev_record) { 199 if (current_record.IsOut()) { 200 on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( 201 cpu_id, current_record.tid, current_record.pid, current_record.tsc)); 202 } 203 // The 'in' case will be handled later when we try to look for its end 204 return Error::success(); 205 } 206 207 const ContextSwitchRecord &prev = *prev_record; 208 if (prev.tsc >= current_record.tsc) 209 return createStringError( 210 inconvertibleErrorCode(), 211 formatv("A context switch record doesn't happen after the previous " 212 "record. Previous TSC= {0}, current TSC = {1}.", 213 prev.tsc, current_record.tsc)); 214 215 if (current_record.IsIn() && prev.IsIn()) { 216 // We found two consecutive ins, which means that we didn't capture 217 // the end of the previous execution. 218 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 219 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 220 } else if (current_record.IsOut() && prev.IsOut()) { 221 // We found two consecutive outs, that means that we didn't capture 222 // the beginning of the current execution. 223 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 224 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 225 current_record.tsc)); 226 } else if (current_record.IsOut() && prev.IsIn()) { 227 if (current_record.pid == prev.pid && current_record.tid == prev.tid) { 228 /// A complete execution 229 on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( 230 cpu_id, current_record.tid, current_record.pid, prev.tsc, 231 current_record.tsc)); 232 } else { 233 // An out after the in of a different thread. The first one doesn't 234 // have an end, and the second one doesn't have a start. 235 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 236 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 237 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 238 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 239 current_record.tsc)); 240 } 241 } 242 return Error::success(); 243 } 244 245 Expected<std::vector<ThreadContinuousExecution>> 246 lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( 247 ArrayRef<uint8_t> data, cpu_id_t cpu_id, 248 const LinuxPerfZeroTscConversion &tsc_conversion) { 249 250 std::vector<ThreadContinuousExecution> executions; 251 252 // This offset is used to create the error message in case of failures. 253 size_t offset = 0; 254 255 auto do_decode = [&]() -> Error { 256 std::optional<ContextSwitchRecord> prev_record; 257 while (offset < data.size()) { 258 const perf_event_header &perf_record = 259 *reinterpret_cast<const perf_event_header *>(data.data() + offset); 260 if (Error err = perf_record.SanityCheck()) 261 return err; 262 263 if (perf_record.IsContextSwitchRecord()) { 264 const PerfContextSwitchRecord &context_switch_record = 265 *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 266 offset); 267 ContextSwitchRecord record{ 268 tsc_conversion.ToTSC(context_switch_record.time_in_nanos), 269 context_switch_record.IsOut(), 270 static_cast<lldb::pid_t>(context_switch_record.pid), 271 static_cast<lldb::tid_t>(context_switch_record.tid)}; 272 273 if (Error err = RecoverExecutionsFromConsecutiveRecords( 274 cpu_id, tsc_conversion, record, prev_record, 275 [&](const ThreadContinuousExecution &execution) { 276 executions.push_back(execution); 277 })) 278 return err; 279 280 prev_record = record; 281 } 282 offset += perf_record.size; 283 } 284 285 // We might have an incomplete last record 286 if (prev_record && prev_record->IsIn()) 287 executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution( 288 cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc)); 289 return Error::success(); 290 }; 291 292 if (Error err = do_decode()) 293 return createStringError(inconvertibleErrorCode(), 294 formatv("Malformed perf context switch trace for " 295 "cpu {0} at offset {1}. {2}", 296 cpu_id, offset, toString(std::move(err)))); 297 298 return executions; 299 } 300 301 Expected<std::vector<uint8_t>> 302 lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace( 303 llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) { 304 size_t offset = 0; 305 std::vector<uint8_t> out_data; 306 307 while (offset < data.size()) { 308 const perf_event_header &perf_record = 309 *reinterpret_cast<const perf_event_header *>(data.data() + offset); 310 if (Error err = perf_record.SanityCheck()) 311 return std::move(err); 312 bool should_copy = false; 313 if (perf_record.IsContextSwitchRecord()) { 314 const PerfContextSwitchRecord &context_switch_record = 315 *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 316 offset); 317 if (pids.count(context_switch_record.pid)) 318 should_copy = true; 319 } else if (perf_record.IsErrorRecord()) { 320 should_copy = true; 321 } 322 323 if (should_copy) { 324 for (size_t i = 0; i < perf_record.size; i++) { 325 out_data.push_back(data[offset + i]); 326 } 327 } 328 329 offset += perf_record.size; 330 } 331 return out_data; 332 } 333