1 //===-- PerfContextSwitchDecoder.cpp --======------------------------------===// 2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3 // See https://llvm.org/LICENSE.txt for license information. 4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5 // 6 //===----------------------------------------------------------------------===// 7 8 #include "PerfContextSwitchDecoder.h" 9 10 using namespace lldb; 11 using namespace lldb_private; 12 using namespace lldb_private::trace_intel_pt; 13 using namespace llvm; 14 15 /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on 16 /// non-linux platforms. 17 /// \{ 18 #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) 19 20 #define PERF_RECORD_LOST 2 21 #define PERF_RECORD_THROTTLE 5 22 #define PERF_RECORD_UNTHROTTLE 6 23 #define PERF_RECORD_LOST_SAMPLES 13 24 #define PERF_RECORD_SWITCH_CPU_WIDE 15 25 #define PERF_RECORD_MAX 19 26 27 struct perf_event_header { 28 uint32_t type; 29 uint16_t misc; 30 uint16_t size; 31 32 /// \return 33 /// An \a llvm::Error if the record looks obviously wrong, or \a 34 /// llvm::Error::success() otherwise. 35 Error SanityCheck() const { 36 // The following checks are based on visual inspection of the records and 37 // enums in 38 // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h 39 // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records 40 // hold. 41 42 // A record of too many uint64_t's or more should mean that the data is 43 // wrong 44 const uint64_t max_valid_size_bytes = 8000; 45 if (size == 0 || size > max_valid_size_bytes) 46 return createStringError( 47 inconvertibleErrorCode(), 48 formatv("A record of {0} bytes was found.", size)); 49 50 // We add some numbers to PERF_RECORD_MAX because some systems might have 51 // custom records. In any case, we are looking only for abnormal data. 52 if (type >= PERF_RECORD_MAX + 100) 53 return createStringError( 54 inconvertibleErrorCode(), 55 formatv("Invalid record type {0} was found.", type)); 56 return Error::success(); 57 } 58 59 bool IsContextSwitchRecord() const { 60 return type == PERF_RECORD_SWITCH_CPU_WIDE; 61 } 62 63 bool IsErrorRecord() const { 64 return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE || 65 type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES; 66 } 67 }; 68 /// \} 69 70 /// Record found in the perf_event context switch traces. It might contain 71 /// additional fields in memory, but header.size should have the actual size 72 /// of the record. 73 struct PerfContextSwitchRecord { 74 struct perf_event_header header; 75 uint32_t next_prev_pid; 76 uint32_t next_prev_tid; 77 uint32_t pid, tid; 78 uint64_t time_in_nanos; 79 80 bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } 81 }; 82 83 /// Record produced after parsing the raw context switch trace produce by 84 /// perf_event. A major difference between this struct and 85 /// PerfContextSwitchRecord is that this one uses tsc instead of nanos. 86 struct ContextSwitchRecord { 87 uint64_t tsc; 88 /// Whether the switch is in or out 89 bool is_out; 90 /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally 91 /// runs after a context switch out of a normal user thread. 92 lldb::pid_t pid; 93 lldb::tid_t tid; 94 95 bool IsOut() const { return is_out; } 96 97 bool IsIn() const { return !is_out; } 98 }; 99 100 uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { 101 switch (variant) { 102 case Variant::Complete: 103 return tscs.complete.start; 104 case Variant::OnlyStart: 105 return tscs.only_start.start; 106 case Variant::OnlyEnd: 107 return tscs.only_end.end; 108 case Variant::HintedEnd: 109 return tscs.hinted_end.start; 110 case Variant::HintedStart: 111 return tscs.hinted_start.end; 112 } 113 } 114 115 uint64_t ThreadContinuousExecution::GetStartTSC() const { 116 switch (variant) { 117 case Variant::Complete: 118 return tscs.complete.start; 119 case Variant::OnlyStart: 120 return tscs.only_start.start; 121 case Variant::OnlyEnd: 122 return 0; 123 case Variant::HintedEnd: 124 return tscs.hinted_end.start; 125 case Variant::HintedStart: 126 return tscs.hinted_start.hinted_start; 127 } 128 } 129 130 uint64_t ThreadContinuousExecution::GetEndTSC() const { 131 switch (variant) { 132 case Variant::Complete: 133 return tscs.complete.end; 134 case Variant::OnlyStart: 135 return std::numeric_limits<uint64_t>::max(); 136 case Variant::OnlyEnd: 137 return tscs.only_end.end; 138 case Variant::HintedEnd: 139 return tscs.hinted_end.hinted_end; 140 case Variant::HintedStart: 141 return tscs.hinted_start.end; 142 } 143 } 144 145 ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( 146 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 147 uint64_t end) { 148 ThreadContinuousExecution o(cpu_id, tid, pid); 149 o.variant = Variant::Complete; 150 o.tscs.complete.start = start; 151 o.tscs.complete.end = end; 152 return o; 153 } 154 155 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( 156 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, 157 uint64_t hinted_start, uint64_t end) { 158 ThreadContinuousExecution o(cpu_id, tid, pid); 159 o.variant = Variant::HintedStart; 160 o.tscs.hinted_start.hinted_start = hinted_start; 161 o.tscs.hinted_start.end = end; 162 return o; 163 } 164 165 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( 166 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 167 uint64_t hinted_end) { 168 ThreadContinuousExecution o(cpu_id, tid, pid); 169 o.variant = Variant::HintedEnd; 170 o.tscs.hinted_end.start = start; 171 o.tscs.hinted_end.hinted_end = hinted_end; 172 return o; 173 } 174 175 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( 176 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { 177 ThreadContinuousExecution o(cpu_id, tid, pid); 178 o.variant = Variant::OnlyEnd; 179 o.tscs.only_end.end = end; 180 return o; 181 } 182 183 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( 184 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { 185 ThreadContinuousExecution o(cpu_id, tid, pid); 186 o.variant = Variant::OnlyStart; 187 o.tscs.only_start.start = start; 188 return o; 189 } 190 191 static Error RecoverExecutionsFromConsecutiveRecords( 192 cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion, 193 const ContextSwitchRecord ¤t_record, 194 const Optional<ContextSwitchRecord> &prev_record, 195 std::function<void(const ThreadContinuousExecution &execution)> 196 on_new_execution) { 197 if (!prev_record) { 198 if (current_record.IsOut()) { 199 on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( 200 cpu_id, current_record.tid, current_record.pid, current_record.tsc)); 201 } 202 // The 'in' case will be handled later when we try to look for its end 203 return Error::success(); 204 } 205 206 const ContextSwitchRecord &prev = *prev_record; 207 if (prev.tsc >= current_record.tsc) 208 return createStringError( 209 inconvertibleErrorCode(), 210 formatv("A context switch record doesn't happen after the previous " 211 "record. Previous TSC= {0}, current TSC = {1}.", 212 prev.tsc, current_record.tsc)); 213 214 if (current_record.IsIn() && prev.IsIn()) { 215 // We found two consecutive ins, which means that we didn't capture 216 // the end of the previous execution. 217 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 218 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 219 } else if (current_record.IsOut() && prev.IsOut()) { 220 // We found two consecutive outs, that means that we didn't capture 221 // the beginning of the current execution. 222 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 223 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 224 current_record.tsc)); 225 } else if (current_record.IsOut() && prev.IsIn()) { 226 if (current_record.pid == prev.pid && current_record.tid == prev.tid) { 227 /// A complete execution 228 on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( 229 cpu_id, current_record.tid, current_record.pid, prev.tsc, 230 current_record.tsc)); 231 } else { 232 // An out after the in of a different thread. The first one doesn't 233 // have an end, and the second one doesn't have a start. 234 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 235 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 236 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 237 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 238 current_record.tsc)); 239 } 240 } 241 return Error::success(); 242 } 243 244 Expected<std::vector<ThreadContinuousExecution>> 245 lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( 246 ArrayRef<uint8_t> data, cpu_id_t cpu_id, 247 const LinuxPerfZeroTscConversion &tsc_conversion) { 248 249 std::vector<ThreadContinuousExecution> executions; 250 251 // This offset is used to create the error message in case of failures. 252 size_t offset = 0; 253 254 auto do_decode = [&]() -> Error { 255 Optional<ContextSwitchRecord> prev_record; 256 while (offset < data.size()) { 257 const perf_event_header &perf_record = 258 *reinterpret_cast<const perf_event_header *>(data.data() + offset); 259 if (Error err = perf_record.SanityCheck()) 260 return err; 261 262 if (perf_record.IsContextSwitchRecord()) { 263 const PerfContextSwitchRecord &context_switch_record = 264 *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 265 offset); 266 ContextSwitchRecord record{ 267 tsc_conversion.ToTSC(context_switch_record.time_in_nanos), 268 context_switch_record.IsOut(), 269 static_cast<lldb::pid_t>(context_switch_record.pid), 270 static_cast<lldb::tid_t>(context_switch_record.tid)}; 271 272 if (Error err = RecoverExecutionsFromConsecutiveRecords( 273 cpu_id, tsc_conversion, record, prev_record, 274 [&](const ThreadContinuousExecution &execution) { 275 executions.push_back(execution); 276 })) 277 return err; 278 279 prev_record = record; 280 } 281 offset += perf_record.size; 282 } 283 284 // We might have an incomplete last record 285 if (prev_record && prev_record->IsIn()) 286 executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution( 287 cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc)); 288 return Error::success(); 289 }; 290 291 if (Error err = do_decode()) 292 return createStringError(inconvertibleErrorCode(), 293 formatv("Malformed perf context switch trace for " 294 "cpu {0} at offset {1}. {2}", 295 cpu_id, offset, toString(std::move(err)))); 296 297 return executions; 298 } 299 300 Expected<std::vector<uint8_t>> 301 lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace( 302 llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) { 303 size_t offset = 0; 304 std::vector<uint8_t> out_data; 305 306 while (offset < data.size()) { 307 const perf_event_header &perf_record = 308 *reinterpret_cast<const perf_event_header *>(data.data() + offset); 309 if (Error err = perf_record.SanityCheck()) 310 return std::move(err); 311 bool should_copy = false; 312 if (perf_record.IsContextSwitchRecord()) { 313 const PerfContextSwitchRecord &context_switch_record = 314 *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 315 offset); 316 if (pids.count(context_switch_record.pid)) 317 should_copy = true; 318 } else if (perf_record.IsErrorRecord()) { 319 should_copy = true; 320 } 321 322 if (should_copy) { 323 for (size_t i = 0; i < perf_record.size; i++) { 324 out_data.push_back(data[offset + i]); 325 } 326 } 327 328 offset += perf_record.size; 329 } 330 return out_data; 331 } 332