1 //===-- PerfContextSwitchDecoder.cpp --======------------------------------===// 2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3 // See https://llvm.org/LICENSE.txt for license information. 4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5 // 6 //===----------------------------------------------------------------------===// 7 8 #include "PerfContextSwitchDecoder.h" 9 10 using namespace lldb; 11 using namespace lldb_private; 12 using namespace lldb_private::trace_intel_pt; 13 using namespace llvm; 14 15 /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on 16 /// non-linux platforms. 17 /// \{ 18 #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) 19 #define PERF_RECORD_MAX 19 20 #define PERF_RECORD_SWITCH_CPU_WIDE 15 21 22 struct perf_event_header { 23 uint32_t type; 24 uint16_t misc; 25 uint16_t size; 26 27 /// \return 28 /// An \a llvm::Error if the record looks obviously wrong, or \a 29 /// llvm::Error::success() otherwise. 30 Error SanityCheck() const { 31 // The following checks are based on visual inspection of the records and 32 // enums in 33 // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h 34 // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records 35 // hold. 36 37 // A record of too many uint64_t's or more should mean that the data is 38 // wrong 39 const uint64_t max_valid_size_bytes = 8000; 40 if (size == 0 || size > max_valid_size_bytes) 41 return createStringError( 42 inconvertibleErrorCode(), 43 formatv("A record of {0} bytes was found.", size)); 44 45 // We add some numbers to PERF_RECORD_MAX because some systems might have 46 // custom records. In any case, we are looking only for abnormal data. 47 if (type >= PERF_RECORD_MAX + 100) 48 return createStringError( 49 inconvertibleErrorCode(), 50 formatv("Invalid record type {0} was found.", type)); 51 return Error::success(); 52 } 53 54 bool IsContextSwitchRecord() const { 55 return type == PERF_RECORD_SWITCH_CPU_WIDE; 56 } 57 }; 58 /// \} 59 60 /// Record found in the perf_event context switch traces. It might contain 61 /// additional fields in memory, but header.size should have the actual size 62 /// of the record. 63 struct PerfContextSwitchRecord { 64 struct perf_event_header header; 65 uint32_t next_prev_pid; 66 uint32_t next_prev_tid; 67 uint32_t pid, tid; 68 uint64_t time_in_nanos; 69 70 bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } 71 }; 72 73 /// Record produced after parsing the raw context switch trace produce by 74 /// perf_event. A major difference between this struct and 75 /// PerfContextSwitchRecord is that this one uses tsc instead of nanos. 76 struct ContextSwitchRecord { 77 uint64_t tsc; 78 /// Whether the switch is in or out 79 bool is_out; 80 /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally 81 /// runs after a context switch out of a normal user thread. 82 lldb::pid_t pid; 83 lldb::tid_t tid; 84 85 bool IsOut() const { return is_out; } 86 87 bool IsIn() const { return !is_out; } 88 }; 89 90 uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { 91 switch (variant) { 92 case Variant::Complete: 93 return tscs.complete.start; 94 case Variant::OnlyStart: 95 return tscs.only_start.start; 96 case Variant::OnlyEnd: 97 return tscs.only_end.end; 98 case Variant::HintedEnd: 99 return tscs.hinted_end.start; 100 case Variant::HintedStart: 101 return tscs.hinted_start.end; 102 } 103 } 104 105 uint64_t ThreadContinuousExecution::GetStartTSC() const { 106 switch (variant) { 107 case Variant::Complete: 108 return tscs.complete.start; 109 case Variant::OnlyStart: 110 return tscs.only_start.start; 111 case Variant::OnlyEnd: 112 return 0; 113 case Variant::HintedEnd: 114 return tscs.hinted_end.start; 115 case Variant::HintedStart: 116 return tscs.hinted_start.hinted_start; 117 } 118 } 119 120 uint64_t ThreadContinuousExecution::GetEndTSC() const { 121 switch (variant) { 122 case Variant::Complete: 123 return tscs.complete.end; 124 case Variant::OnlyStart: 125 return std::numeric_limits<uint64_t>::max(); 126 case Variant::OnlyEnd: 127 return tscs.only_end.end; 128 case Variant::HintedEnd: 129 return tscs.hinted_end.hinted_end; 130 case Variant::HintedStart: 131 return tscs.hinted_start.end; 132 } 133 } 134 135 ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( 136 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 137 uint64_t end) { 138 ThreadContinuousExecution o(cpu_id, tid, pid); 139 o.variant = Variant::Complete; 140 o.tscs.complete.start = start; 141 o.tscs.complete.end = end; 142 return o; 143 } 144 145 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( 146 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, 147 uint64_t hinted_start, uint64_t end) { 148 ThreadContinuousExecution o(cpu_id, tid, pid); 149 o.variant = Variant::HintedStart; 150 o.tscs.hinted_start.hinted_start = hinted_start; 151 o.tscs.hinted_start.end = end; 152 return o; 153 } 154 155 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( 156 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, 157 uint64_t hinted_end) { 158 ThreadContinuousExecution o(cpu_id, tid, pid); 159 o.variant = Variant::HintedEnd; 160 o.tscs.hinted_end.start = start; 161 o.tscs.hinted_end.hinted_end = hinted_end; 162 return o; 163 } 164 165 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( 166 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { 167 ThreadContinuousExecution o(cpu_id, tid, pid); 168 o.variant = Variant::OnlyEnd; 169 o.tscs.only_end.end = end; 170 return o; 171 } 172 173 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( 174 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { 175 ThreadContinuousExecution o(cpu_id, tid, pid); 176 o.variant = Variant::OnlyStart; 177 o.tscs.only_start.start = start; 178 return o; 179 } 180 181 static Error RecoverExecutionsFromConsecutiveRecords( 182 cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion, 183 const ContextSwitchRecord ¤t_record, 184 const Optional<ContextSwitchRecord> &prev_record, 185 std::function<void(const ThreadContinuousExecution &execution)> 186 on_new_execution) { 187 if (!prev_record) { 188 if (current_record.IsOut()) { 189 on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( 190 cpu_id, current_record.tid, current_record.pid, current_record.tsc)); 191 } 192 // The 'in' case will be handled later when we try to look for its end 193 return Error::success(); 194 } 195 196 const ContextSwitchRecord &prev = *prev_record; 197 if (prev.tsc >= current_record.tsc) 198 return createStringError( 199 inconvertibleErrorCode(), 200 formatv("A context switch record doesn't happen after the previous " 201 "record. Previous TSC= {0}, current TSC = {1}.", 202 prev.tsc, current_record.tsc)); 203 204 if (current_record.IsIn() && prev.IsIn()) { 205 // We found two consecutive ins, which means that we didn't capture 206 // the end of the previous execution. 207 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 208 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 209 } else if (current_record.IsOut() && prev.IsOut()) { 210 // We found two consecutive outs, that means that we didn't capture 211 // the beginning of the current execution. 212 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 213 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 214 current_record.tsc)); 215 } else if (current_record.IsOut() && prev.IsIn()) { 216 if (current_record.pid == prev.pid && current_record.tid == prev.tid) { 217 /// A complete execution 218 on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( 219 cpu_id, current_record.tid, current_record.pid, prev.tsc, 220 current_record.tsc)); 221 } else { 222 // An out after the in of a different thread. The first one doesn't 223 // have an end, and the second one doesn't have a start. 224 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( 225 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); 226 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( 227 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1, 228 current_record.tsc)); 229 } 230 } 231 return Error::success(); 232 } 233 234 Expected<std::vector<ThreadContinuousExecution>> 235 lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( 236 ArrayRef<uint8_t> data, cpu_id_t cpu_id, 237 const LinuxPerfZeroTscConversion &tsc_conversion) { 238 239 std::vector<ThreadContinuousExecution> executions; 240 241 // This offset is used to create the error message in case of failures. 242 size_t offset = 0; 243 244 auto do_decode = [&]() -> Error { 245 Optional<ContextSwitchRecord> prev_record; 246 while (offset < data.size()) { 247 const perf_event_header &perf_record = 248 *reinterpret_cast<const perf_event_header *>(data.data() + offset); 249 if (Error err = perf_record.SanityCheck()) 250 return err; 251 252 if (perf_record.IsContextSwitchRecord()) { 253 const PerfContextSwitchRecord &context_switch_record = 254 *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() + 255 offset); 256 ContextSwitchRecord record{ 257 tsc_conversion.ToTSC(context_switch_record.time_in_nanos), 258 context_switch_record.IsOut(), 259 static_cast<lldb::pid_t>(context_switch_record.pid), 260 static_cast<lldb::tid_t>(context_switch_record.tid)}; 261 262 if (Error err = RecoverExecutionsFromConsecutiveRecords( 263 cpu_id, tsc_conversion, record, prev_record, 264 [&](const ThreadContinuousExecution &execution) { 265 executions.push_back(execution); 266 })) 267 return err; 268 269 prev_record = record; 270 } 271 offset += perf_record.size; 272 } 273 274 // We might have an incomplete last record 275 if (prev_record && prev_record->IsIn()) 276 executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution( 277 cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc)); 278 return Error::success(); 279 }; 280 281 if (Error err = do_decode()) 282 return createStringError(inconvertibleErrorCode(), 283 formatv("Malformed perf context switch trace for " 284 "cpu {0} at offset {1}. {2}", 285 cpu_id, offset, toString(std::move(err)))); 286 287 return executions; 288 } 289