1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
18 
19 #include <math.h>
20 
21 #include "perfetto/ext/base/utils.h"
22 #include "src/trace_processor/importers/common/args_tracker.h"
23 #include "src/trace_processor/importers/common/event_tracker.h"
24 #include "src/trace_processor/importers/common/process_tracker.h"
25 #include "src/trace_processor/importers/common/system_info_tracker.h"
26 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/types/task_state.h"
29 #include "src/trace_processor/types/trace_processor_context.h"
30 #include "src/trace_processor/types/variadic.h"
31 
32 #include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
33 #include "protos/perfetto/trace/ftrace/sched.pbzero.h"
34 
35 namespace perfetto {
36 namespace trace_processor {
37 
SchedEventTracker(TraceProcessorContext * context)38 SchedEventTracker::SchedEventTracker(TraceProcessorContext* context)
39     : context_(context) {
40   // pre-parse sched_switch
41   auto* switch_descriptor = GetMessageDescriptorForId(
42       protos::pbzero::FtraceEvent::kSchedSwitchFieldNumber);
43   PERFETTO_CHECK(switch_descriptor->max_field_id == kSchedSwitchMaxFieldId);
44 
45   for (size_t i = 1; i <= kSchedSwitchMaxFieldId; i++) {
46     sched_switch_field_ids_[i] =
47         context->storage->InternString(switch_descriptor->fields[i].name);
48   }
49   sched_switch_id_ = context->storage->InternString(switch_descriptor->name);
50 
51   // pre-parse sched_waking
52   auto* waking_descriptor = GetMessageDescriptorForId(
53       protos::pbzero::FtraceEvent::kSchedWakingFieldNumber);
54   PERFETTO_CHECK(waking_descriptor->max_field_id == kSchedWakingMaxFieldId);
55 
56   for (size_t i = 1; i <= kSchedWakingMaxFieldId; i++) {
57     sched_waking_field_ids_[i] =
58         context->storage->InternString(waking_descriptor->fields[i].name);
59   }
60   sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
61 }
62 
63 SchedEventTracker::~SchedEventTracker() = default;
64 
PushSchedSwitch(uint32_t cpu,int64_t ts,uint32_t prev_pid,base::StringView prev_comm,int32_t prev_prio,int64_t prev_state,uint32_t next_pid,base::StringView next_comm,int32_t next_prio)65 void SchedEventTracker::PushSchedSwitch(uint32_t cpu,
66                                         int64_t ts,
67                                         uint32_t prev_pid,
68                                         base::StringView prev_comm,
69                                         int32_t prev_prio,
70                                         int64_t prev_state,
71                                         uint32_t next_pid,
72                                         base::StringView next_comm,
73                                         int32_t next_prio) {
74   // At this stage all events should be globally timestamp ordered.
75   if (ts < context_->event_tracker->max_timestamp()) {
76     PERFETTO_ELOG(
77         "sched_switch event out of order by %.4f ms, skipping",
78         static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
79             1e6);
80     context_->storage->IncrementStats(stats::sched_switch_out_of_order);
81     return;
82   }
83   context_->event_tracker->UpdateMaxTimestamp(ts);
84   PERFETTO_DCHECK(cpu < kMaxCpus);
85 
86   StringId next_comm_id = context_->storage->InternString(next_comm);
87   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
88       next_pid, next_comm_id, ThreadNamePriority::kFtrace);
89 
90   // First use this data to close the previous slice.
91   bool prev_pid_match_prev_next_pid = false;
92   auto* pending_sched = &pending_sched_per_cpu_[cpu];
93   uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
94   if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
95     prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
96     if (PERFETTO_LIKELY(prev_pid_match_prev_next_pid)) {
97       ClosePendingSlice(pending_slice_idx, ts, prev_state);
98     } else {
99       // If the pids are not consistent, make a note of this.
100       context_->storage->IncrementStats(stats::mismatched_sched_switch_tids);
101     }
102   }
103 
104   // We have to intern prev_comm again because our assumption that
105   // this event's |prev_comm| == previous event's |next_comm| does not hold
106   // if the thread changed its name while scheduled.
107   StringId prev_comm_id = context_->storage->InternString(prev_comm);
108   UniqueTid prev_utid = context_->process_tracker->UpdateThreadName(
109       prev_pid, prev_comm_id, ThreadNamePriority::kFtrace);
110 
111   auto new_slice_idx = AddRawEventAndStartSlice(
112       cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
113       next_utid, next_pid, next_comm_id, next_prio);
114 
115   // Finally, update the info for the next sched switch on this CPU.
116   pending_sched->pending_slice_storage_idx = new_slice_idx;
117   pending_sched->last_pid = next_pid;
118   pending_sched->last_utid = next_utid;
119   pending_sched->last_prio = next_prio;
120 }
121 
PushSchedSwitchCompact(uint32_t cpu,int64_t ts,int64_t prev_state,uint32_t next_pid,int32_t next_prio,StringId next_comm_id)122 void SchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
123                                                int64_t ts,
124                                                int64_t prev_state,
125                                                uint32_t next_pid,
126                                                int32_t next_prio,
127                                                StringId next_comm_id) {
128   // At this stage all events should be globally timestamp ordered.
129   if (ts < context_->event_tracker->max_timestamp()) {
130     PERFETTO_ELOG(
131         "sched_switch event out of order by %.4f ms, skipping",
132         static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
133             1e6);
134     context_->storage->IncrementStats(stats::sched_switch_out_of_order);
135     return;
136   }
137   context_->event_tracker->UpdateMaxTimestamp(ts);
138   PERFETTO_DCHECK(cpu < kMaxCpus);
139 
140   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
141       next_pid, next_comm_id, ThreadNamePriority::kFtrace);
142 
143   auto* pending_sched = &pending_sched_per_cpu_[cpu];
144 
145   // If we're processing the first compact event for this cpu, don't start a
146   // slice since we're missing the "prev_*" fields. The successive events will
147   // create slices as normal, but the first per-cpu switch is effectively
148   // discarded.
149   if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
150     context_->storage->IncrementStats(stats::compact_sched_switch_skipped);
151 
152     pending_sched->last_pid = next_pid;
153     pending_sched->last_utid = next_utid;
154     pending_sched->last_prio = next_prio;
155     // Note: no pending slice, so leave |pending_slice_storage_idx| in its
156     // invalid state.
157     return;
158   }
159 
160   // Close the pending slice if any (we won't have one when processing the first
161   // two compact events for a given cpu).
162   uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
163   if (pending_slice_idx < std::numeric_limits<uint32_t>::max())
164     ClosePendingSlice(pending_slice_idx, ts, prev_state);
165 
166   // Use the previous event's values to infer this event's "prev_*" fields.
167   // There are edge cases, but this assumption should still produce sensible
168   // results in the absence of data loss.
169   UniqueTid prev_utid = pending_sched->last_utid;
170   uint32_t prev_pid = pending_sched->last_pid;
171   int32_t prev_prio = pending_sched->last_prio;
172 
173   // Do a fresh task name lookup in case it was updated by a task_rename while
174   // scheduled.
175   StringId prev_comm_id = context_->storage->thread_table().name()[prev_utid];
176 
177   auto new_slice_idx = AddRawEventAndStartSlice(
178       cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
179       next_utid, next_pid, next_comm_id, next_prio);
180 
181   // Finally, update the info for the next sched switch on this CPU.
182   pending_sched->pending_slice_storage_idx = new_slice_idx;
183   pending_sched->last_pid = next_pid;
184   pending_sched->last_utid = next_utid;
185   pending_sched->last_prio = next_prio;
186 }
187 
188 PERFETTO_ALWAYS_INLINE
AddRawEventAndStartSlice(uint32_t cpu,int64_t ts,UniqueTid prev_utid,uint32_t prev_pid,StringId prev_comm_id,int32_t prev_prio,int64_t prev_state,UniqueTid next_utid,uint32_t next_pid,StringId next_comm_id,int32_t next_prio)189 uint32_t SchedEventTracker::AddRawEventAndStartSlice(uint32_t cpu,
190                                                      int64_t ts,
191                                                      UniqueTid prev_utid,
192                                                      uint32_t prev_pid,
193                                                      StringId prev_comm_id,
194                                                      int32_t prev_prio,
195                                                      int64_t prev_state,
196                                                      UniqueTid next_utid,
197                                                      uint32_t next_pid,
198                                                      StringId next_comm_id,
199                                                      int32_t next_prio) {
200   if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
201     // Push the raw event - this is done as the raw ftrace event codepath does
202     // not insert sched_switch.
203     RawId id = context_->storage->mutable_raw_table()
204                    ->Insert({ts, sched_switch_id_, cpu, prev_utid})
205                    .id;
206 
207     // Note: this ordering is important. The events should be pushed in the same
208     // order as the order of fields in the proto; this is used by the raw table
209     // to index these events using the field ids.
210     using SS = protos::pbzero::SchedSwitchFtraceEvent;
211 
212     auto inserter = context_->args_tracker->AddArgsTo(id);
213     auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
214       StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
215       inserter.AddArg(key, var);
216     };
217     add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
218     add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
219     add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
220     add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
221     add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
222     add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
223     add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
224   }
225 
226   // Open a new scheduling slice, corresponding to the task that was
227   // just switched to.
228   auto* sched = context_->storage->mutable_sched_slice_table();
229   auto row_and_id = sched->Insert(
230       {ts, 0 /* duration */, cpu, next_utid, kNullStringId, next_prio});
231   SchedId sched_id = row_and_id.id;
232   return *sched->id().IndexOf(sched_id);
233 }
234 
235 PERFETTO_ALWAYS_INLINE
ClosePendingSlice(uint32_t pending_slice_idx,int64_t ts,int64_t prev_state)236 void SchedEventTracker::ClosePendingSlice(uint32_t pending_slice_idx,
237                                           int64_t ts,
238                                           int64_t prev_state) {
239   auto* slices = context_->storage->mutable_sched_slice_table();
240 
241   int64_t duration = ts - slices->ts()[pending_slice_idx];
242   slices->mutable_dur()->Set(pending_slice_idx, duration);
243 
244   // We store the state as a uint16 as we only consider values up to 2048
245   // when unpacking the information inside; this allows savings of 48 bits
246   // per slice.
247   auto kernel_version =
248       SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
249   auto task_state = ftrace_utils::TaskState(static_cast<uint16_t>(prev_state),
250                                             kernel_version);
251   if (!task_state.is_valid()) {
252     context_->storage->IncrementStats(stats::task_state_invalid);
253   }
254   auto id = task_state.is_valid()
255                 ? context_->storage->InternString(task_state.ToString().data())
256                 : kNullStringId;
257   slices->mutable_end_state()->Set(pending_slice_idx, id);
258 }
259 
260 // Processes a sched_waking that was decoded from a compact representation,
261 // adding to the raw and instants tables.
PushSchedWakingCompact(uint32_t cpu,int64_t ts,uint32_t wakee_pid,int32_t target_cpu,int32_t prio,StringId comm_id)262 void SchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
263                                                int64_t ts,
264                                                uint32_t wakee_pid,
265                                                int32_t target_cpu,
266                                                int32_t prio,
267                                                StringId comm_id) {
268   // At this stage all events should be globally timestamp ordered.
269   if (ts < context_->event_tracker->max_timestamp()) {
270     PERFETTO_ELOG(
271         "sched_waking event out of order by %.4f ms, skipping",
272         static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
273             1e6);
274     context_->storage->IncrementStats(stats::sched_waking_out_of_order);
275     return;
276   }
277   context_->event_tracker->UpdateMaxTimestamp(ts);
278   PERFETTO_DCHECK(cpu < kMaxCpus);
279 
280   // We infer the task that emitted the event (i.e. common_pid) from the
281   // scheduling slices. Drop the event if we haven't seen any sched_switch
282   // events for this cpu yet.
283   // Note that if sched_switch wasn't enabled, we will have to skip all
284   // compact waking events.
285   auto* pending_sched = &pending_sched_per_cpu_[cpu];
286   if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
287     context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
288     return;
289   }
290   auto curr_utid = pending_sched->last_utid;
291 
292   if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
293     // Add an entry to the raw table.
294     RawId id = context_->storage->mutable_raw_table()
295                    ->Insert({ts, sched_waking_id_, cpu, curr_utid})
296                    .id;
297 
298     // "success" is hardcoded as always 1 by the kernel, with a TODO to remove
299     // it.
300     static constexpr int32_t kHardcodedSuccess = 1;
301 
302     using SW = protos::pbzero::SchedWakingFtraceEvent;
303     auto inserter = context_->args_tracker->AddArgsTo(id);
304     auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
305       StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
306       inserter.AddArg(key, var);
307     };
308     add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
309     add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
310     add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
311     add_raw_arg(SW::kSuccessFieldNumber, Variadic::Integer(kHardcodedSuccess));
312     add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
313   }
314 
315   // Add a waking entry to the instants.
316   auto wakee_utid = context_->process_tracker->GetOrCreateThread(wakee_pid);
317   auto* instants = context_->storage->mutable_instant_table();
318   auto ref_type_id = context_->storage->InternString(
319       GetRefTypeStringMap()[static_cast<size_t>(RefType::kRefUtid)]);
320   instants->Insert({ts, sched_waking_id_, wakee_utid, ref_type_id});
321 }
322 
FlushPendingEvents()323 void SchedEventTracker::FlushPendingEvents() {
324   // TODO(lalitm): the day this method is called before end of trace, don't
325   // flush the sched events as they will probably be pushed in the next round
326   // of ftrace events.
327   int64_t end_ts = context_->storage->GetTraceTimestampBoundsNs().second;
328   auto* slices = context_->storage->mutable_sched_slice_table();
329   for (const auto& pending_sched : pending_sched_per_cpu_) {
330     uint32_t row = pending_sched.pending_slice_storage_idx;
331     if (row == std::numeric_limits<uint32_t>::max())
332       continue;
333 
334     int64_t duration = end_ts - slices->ts()[row];
335     slices->mutable_dur()->Set(row, duration);
336 
337     auto state = ftrace_utils::TaskState(ftrace_utils::TaskState::kRunnable);
338     auto id = context_->storage->InternString(state.ToString().data());
339     slices->mutable_end_state()->Set(row, id);
340   }
341 
342   pending_sched_per_cpu_ = {};
343 }
344 
345 }  // namespace trace_processor
346 }  // namespace perfetto
347