1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/ftrace/cpu_reader.h"
18 
19 #include <dirent.h>
20 #include <signal.h>
21 
22 #include <utility>
23 
24 #include "perfetto/base/build_config.h"
25 #include "perfetto/base/logging.h"
26 #include "perfetto/ext/base/metatrace.h"
27 #include "perfetto/ext/base/optional.h"
28 #include "perfetto/ext/base/utils.h"
29 #include "perfetto/ext/tracing/core/trace_writer.h"
30 #include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
31 #include "protos/perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
32 #include "protos/perfetto/trace/ftrace/generic.pbzero.h"
33 #include "protos/perfetto/trace/trace_packet.pbzero.h"
34 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
35 #include "src/traced/probes/ftrace/ftrace_controller.h"
36 #include "src/traced/probes/ftrace/ftrace_data_source.h"
37 #include "src/traced/probes/ftrace/proto_translation_table.h"
38 
39 namespace perfetto {
40 namespace {
41 
42 // If the compact_sched buffer accumulates more unique strings, the reader will
43 // flush it to reset the interning state (and make it cheap again).
44 // This is not an exact cap, since we check only at tracing page boundaries.
45 // TODO(rsavitski): consider making part of compact_sched config.
46 constexpr size_t kCompactSchedInternerThreshold = 64;
47 
48 // For further documentation of these constants see the kernel source:
49 // linux/include/linux/ring_buffer.h
50 // Some information about the values of these constants are exposed to user
51 // space at: /sys/kernel/debug/tracing/events/header_event
52 constexpr uint32_t kTypeDataTypeLengthMax = 28;
53 constexpr uint32_t kTypePadding = 29;
54 constexpr uint32_t kTypeTimeExtend = 30;
55 constexpr uint32_t kTypeTimeStamp = 31;
56 
57 struct EventHeader {
58   uint32_t type_or_length : 5;
59   uint32_t time_delta : 27;
60 };
61 
62 struct TimeStamp {
63   uint64_t tv_nsec;
64   uint64_t tv_sec;
65 };
66 
ReadIntoString(const uint8_t * start,const uint8_t * end,uint32_t field_id,protozero::Message * out)67 bool ReadIntoString(const uint8_t* start,
68                     const uint8_t* end,
69                     uint32_t field_id,
70                     protozero::Message* out) {
71   for (const uint8_t* c = start; c < end; c++) {
72     if (*c != '\0')
73       continue;
74     out->AppendBytes(field_id, reinterpret_cast<const char*>(start),
75                      static_cast<uintptr_t>(c - start));
76     return true;
77   }
78   return false;
79 }
80 
ReadDataLoc(const uint8_t * start,const uint8_t * field_start,const uint8_t * end,const Field & field,protozero::Message * message)81 bool ReadDataLoc(const uint8_t* start,
82                  const uint8_t* field_start,
83                  const uint8_t* end,
84                  const Field& field,
85                  protozero::Message* message) {
86   PERFETTO_DCHECK(field.ftrace_size == 4);
87   // See
88   // https://github.com/torvalds/linux/blob/master/include/trace/trace_events.h
89   uint32_t data = 0;
90   const uint8_t* ptr = field_start;
91   if (!CpuReader::ReadAndAdvance(&ptr, end, &data)) {
92     PERFETTO_DFATAL("Buffer overflowed.");
93     return false;
94   }
95 
96   const uint16_t offset = data & 0xffff;
97   const uint16_t len = (data >> 16) & 0xffff;
98   const uint8_t* const string_start = start + offset;
99   const uint8_t* const string_end = string_start + len;
100   if (string_start <= start || string_end > end) {
101     PERFETTO_DFATAL("Buffer overflowed.");
102     return false;
103   }
104   ReadIntoString(string_start, string_end, field.proto_field_id, message);
105   return true;
106 }
107 
108 template <typename T>
ReadValue(const uint8_t * ptr)109 T ReadValue(const uint8_t* ptr) {
110   T t;
111   memcpy(&t, reinterpret_cast<const void*>(ptr), sizeof(T));
112   return t;
113 }
114 
115 // Reads a signed ftrace value as an int64_t, sign extending if necessary.
ReadSignedFtraceValue(const uint8_t * ptr,FtraceFieldType ftrace_type)116 static int64_t ReadSignedFtraceValue(const uint8_t* ptr,
117                                      FtraceFieldType ftrace_type) {
118   if (ftrace_type == kFtraceInt32) {
119     int32_t value;
120     memcpy(&value, reinterpret_cast<const void*>(ptr), sizeof(value));
121     return int64_t(value);
122   }
123   if (ftrace_type == kFtraceInt64) {
124     int64_t value;
125     memcpy(&value, reinterpret_cast<const void*>(ptr), sizeof(value));
126     return value;
127   }
128   PERFETTO_FATAL("unexpected ftrace type");
129 }
130 
SetBlocking(int fd,bool is_blocking)131 bool SetBlocking(int fd, bool is_blocking) {
132   int flags = fcntl(fd, F_GETFL, 0);
133   flags = (is_blocking) ? (flags & ~O_NONBLOCK) : (flags | O_NONBLOCK);
134   return fcntl(fd, F_SETFL, flags) == 0;
135 }
136 
137 }  // namespace
138 
139 using protos::pbzero::GenericFtraceEvent;
140 
CpuReader(size_t cpu,const ProtoTranslationTable * table,base::ScopedFile trace_fd)141 CpuReader::CpuReader(size_t cpu,
142                      const ProtoTranslationTable* table,
143                      base::ScopedFile trace_fd)
144     : cpu_(cpu), table_(table), trace_fd_(std::move(trace_fd)) {
145   PERFETTO_CHECK(trace_fd_);
146   PERFETTO_CHECK(SetBlocking(*trace_fd_, false));
147 }
148 
149 CpuReader::~CpuReader() = default;
150 
ReadCycle(uint8_t * parsing_buf,size_t parsing_buf_size_pages,size_t max_pages,const std::set<FtraceDataSource * > & started_data_sources)151 size_t CpuReader::ReadCycle(
152     uint8_t* parsing_buf,
153     size_t parsing_buf_size_pages,
154     size_t max_pages,
155     const std::set<FtraceDataSource*>& started_data_sources) {
156   PERFETTO_DCHECK(max_pages > 0 && parsing_buf_size_pages > 0);
157   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
158                              metatrace::FTRACE_CPU_READ_CYCLE);
159 
160   // Work in batches to keep cache locality, and limit memory usage.
161   size_t batch_pages = std::min(parsing_buf_size_pages, max_pages);
162   size_t total_pages_read = 0;
163   for (bool is_first_batch = true;; is_first_batch = false) {
164     size_t pages_read = ReadAndProcessBatch(
165         parsing_buf, batch_pages, is_first_batch, started_data_sources);
166 
167     PERFETTO_DCHECK(pages_read <= batch_pages);
168     total_pages_read += pages_read;
169 
170     // Check whether we've caught up to the writer, or possibly giving up on
171     // this attempt due to some error.
172     if (pages_read != batch_pages)
173       break;
174     // Check if we've hit the limit of work for this cycle.
175     if (total_pages_read >= max_pages)
176       break;
177   }
178   PERFETTO_METATRACE_COUNTER(TAG_FTRACE, FTRACE_PAGES_DRAINED,
179                              total_pages_read);
180   return total_pages_read;
181 }
182 
183 // metatrace note: mark the reading phase as FTRACE_CPU_READ_BATCH, but let the
184 // parsing time be implied (by the difference between the caller's span, and
185 // this reading span). Makes it easier to estimate the read/parse ratio when
186 // looking at the trace in the UI.
ReadAndProcessBatch(uint8_t * parsing_buf,size_t max_pages,bool first_batch_in_cycle,const std::set<FtraceDataSource * > & started_data_sources)187 size_t CpuReader::ReadAndProcessBatch(
188     uint8_t* parsing_buf,
189     size_t max_pages,
190     bool first_batch_in_cycle,
191     const std::set<FtraceDataSource*>& started_data_sources) {
192   size_t pages_read = 0;
193   {
194     metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
195                                metatrace::FTRACE_CPU_READ_BATCH);
196     for (; pages_read < max_pages;) {
197       uint8_t* curr_page = parsing_buf + (pages_read * base::kPageSize);
198       ssize_t res =
199           PERFETTO_EINTR(read(*trace_fd_, curr_page, base::kPageSize));
200       if (res < 0) {
201         // Expected errors:
202         // EAGAIN: no data (since we're in non-blocking mode).
203         // ENONMEM, EBUSY: temporary ftrace failures (they happen).
204         if (errno != EAGAIN && errno != ENOMEM && errno != EBUSY)
205           PERFETTO_PLOG("Unexpected error on raw ftrace read");
206         break;  // stop reading regardless of errno
207       }
208 
209       // As long as all of our reads are for a single page, the kernel should
210       // return exactly a well-formed raw ftrace page (if not in the steady
211       // state of reading out fully-written pages, the kernel will construct
212       // pages as necessary, copying over events and zero-filling at the end).
213       // A sub-page read() is therefore not expected in practice (unless
214       // there's a concurrent reader requesting less than a page?). Crash if
215       // encountering this situation. Kernel source pointer: see usage of
216       // |info->read| within |tracing_buffers_read|.
217       if (res == 0) {
218         // Very rare, but possible. Stop for now, should recover.
219         PERFETTO_DLOG("[cpu%zu]: 0-sized read from ftrace pipe.", cpu_);
220         break;
221       }
222       PERFETTO_CHECK(res == static_cast<ssize_t>(base::kPageSize));
223 
224       pages_read += 1;
225 
226       // Compare the amount of ftrace data read against an empirical threshold
227       // to make an educated guess on whether we should read more. To figure
228       // out the amount of ftrace data, we need to parse the page header (since
229       // the read always returns a page, zero-filled at the end). If we read
230       // fewer bytes than the threshold, it means that we caught up with the
231       // write pointer and we started consuming ftrace events in real-time.
232       // This cannot be just 4096 because it needs to account for
233       // fragmentation, i.e. for the fact that the last trace event didn't fit
234       // in the current page and hence the current page was terminated
235       // prematurely.
236       static constexpr size_t kRoughlyAPage = base::kPageSize - 512;
237       const uint8_t* scratch_ptr = curr_page;
238       base::Optional<PageHeader> hdr =
239           ParsePageHeader(&scratch_ptr, table_->page_header_size_len());
240       PERFETTO_DCHECK(hdr && hdr->size > 0 && hdr->size <= base::kPageSize);
241       if (!hdr.has_value()) {
242         PERFETTO_ELOG("[cpu%zu]: can't parse page header", cpu_);
243         break;
244       }
245       // Note that the first read after starting the read cycle being small is
246       // normal. It means that we're given the remainder of events from a
247       // page that we've partially consumed during the last read of the previous
248       // cycle (having caught up to the writer).
249       if (hdr->size < kRoughlyAPage &&
250           !(first_batch_in_cycle && pages_read == 1)) {
251         break;
252       }
253     }
254   }  // end of metatrace::FTRACE_CPU_READ_BATCH
255 
256   // Parse the pages and write to the trace for all relevant data
257   // sources.
258   if (pages_read == 0)
259     return pages_read;
260 
261   for (FtraceDataSource* data_source : started_data_sources) {
262     bool success = ProcessPagesForDataSource(
263         data_source->trace_writer(), data_source->mutable_metadata(), cpu_,
264         data_source->parsing_config(), parsing_buf, pages_read, table_);
265     PERFETTO_CHECK(success);
266   }
267 
268   return pages_read;
269 }
270 
271 // static
ProcessPagesForDataSource(TraceWriter * trace_writer,FtraceMetadata * metadata,size_t cpu,const FtraceDataSourceConfig * ds_config,const uint8_t * parsing_buf,const size_t pages_read,const ProtoTranslationTable * table)272 bool CpuReader::ProcessPagesForDataSource(
273     TraceWriter* trace_writer,
274     FtraceMetadata* metadata,
275     size_t cpu,
276     const FtraceDataSourceConfig* ds_config,
277     const uint8_t* parsing_buf,
278     const size_t pages_read,
279     const ProtoTranslationTable* table) {
280   // Begin an FtraceEventBundle, and allocate the buffer for compact scheduler
281   // events (which will be unused if the compact option isn't enabled).
282   CompactSchedBuffer compact_sched;
283   auto packet = trace_writer->NewTracePacket();
284   auto* bundle = packet->set_ftrace_events();
285 
286   bool compact_sched_enabled = ds_config->compact_sched.enabled;
287 
288   // Note: The fastpath in proto_trace_parser.cc speculates on the fact
289   // that the cpu field is the first field of the proto message. If this
290   // changes, change proto_trace_parser.cc accordingly.
291   bundle->set_cpu(static_cast<uint32_t>(cpu));
292 
293   for (size_t i = 0; i < pages_read; i++) {
294     const uint8_t* curr_page = parsing_buf + (i * base::kPageSize);
295     const uint8_t* curr_page_end = curr_page + base::kPageSize;
296     const uint8_t* parse_pos = curr_page;
297     base::Optional<PageHeader> page_header =
298         ParsePageHeader(&parse_pos, table->page_header_size_len());
299 
300     if (!page_header.has_value() || page_header->size == 0 ||
301         parse_pos >= curr_page_end ||
302         parse_pos + page_header->size > curr_page_end) {
303       PERFETTO_DFATAL("invalid page header");
304       return false;
305     }
306 
307     // Start a new bundle if either:
308     // * The page we're about to read indicates that there was a kernel ring
309     //   buffer overrun since our last read from that per-cpu buffer. We have
310     //   a single |lost_events| field per bundle, so start a new packet.
311     // * The compact_sched buffer is holding more unique interned strings than
312     //   a threshold. We need to flush the compact buffer to make the
313     //   interning lookups cheap again.
314     bool interner_past_threshold =
315         compact_sched_enabled &&
316         compact_sched.interner().interned_comms_size() >
317             kCompactSchedInternerThreshold;
318     if (page_header->lost_events || interner_past_threshold) {
319       if (compact_sched_enabled)
320         compact_sched.WriteAndReset(bundle);
321       packet->Finalize();
322 
323       packet = trace_writer->NewTracePacket();
324       bundle = packet->set_ftrace_events();
325       bundle->set_cpu(static_cast<uint32_t>(cpu));
326       if (page_header->lost_events)
327         bundle->set_lost_events(true);
328     }
329 
330     size_t evt_size =
331         ParsePagePayload(parse_pos, &page_header.value(), table, ds_config,
332                          &compact_sched, bundle, metadata);
333 
334     // TODO(rsavitski): propagate error to trace processor in release builds.
335     // (FtraceMetadata -> FtraceStats in trace).
336     PERFETTO_DCHECK(evt_size == page_header->size);
337   }
338 
339   if (compact_sched_enabled)
340     compact_sched.WriteAndReset(bundle);
341 
342   return true;
343 }
344 
345 // A page header consists of:
346 // * timestamp: 8 bytes
347 // * commit: 8 bytes on 64 bit, 4 bytes on 32 bit kernels
348 //
349 // The kernel reports this at /sys/kernel/debug/tracing/events/header_page.
350 //
351 // |commit|'s bottom bits represent the length of the payload following this
352 // header. The top bits have been repurposed as a bitset of flags pertaining to
353 // data loss. We look only at the "there has been some data lost" flag
354 // (RB_MISSED_EVENTS), and ignore the relatively tricky "appended the precise
355 // lost events count past the end of the valid data, as there was room to do so"
356 // flag (RB_MISSED_STORED).
357 //
358 // static
ParsePageHeader(const uint8_t ** ptr,uint16_t page_header_size_len)359 base::Optional<CpuReader::PageHeader> CpuReader::ParsePageHeader(
360     const uint8_t** ptr,
361     uint16_t page_header_size_len) {
362   // Mask for the data length portion of the |commit| field. Note that the
363   // kernel implementation never explicitly defines the boundary (beyond using
364   // bits 30 and 31 as flags), but 27 bits are mentioned as sufficient in the
365   // original commit message, and is the constant used by trace-cmd.
366   constexpr static uint64_t kDataSizeMask = (1ull << 27) - 1;
367   // If set, indicates that the relevant cpu has lost events since the last read
368   // (clearing the bit internally).
369   constexpr static uint64_t kMissedEventsFlag = (1ull << 31);
370 
371   const uint8_t* end_of_page = *ptr + base::kPageSize;
372   PageHeader page_header;
373   if (!CpuReader::ReadAndAdvance<uint64_t>(ptr, end_of_page,
374                                            &page_header.timestamp))
375     return base::nullopt;
376 
377   uint32_t size_and_flags;
378 
379   // On little endian, we can just read a uint32_t and reject the rest of the
380   // number later.
381   if (!CpuReader::ReadAndAdvance<uint32_t>(
382           ptr, end_of_page, base::AssumeLittleEndian(&size_and_flags)))
383     return base::nullopt;
384 
385   page_header.size = size_and_flags & kDataSizeMask;
386   page_header.lost_events = bool(size_and_flags & kMissedEventsFlag);
387   PERFETTO_DCHECK(page_header.size <= base::kPageSize);
388 
389   // Reject rest of the number, if applicable. On 32-bit, size_bytes - 4 will
390   // evaluate to 0 and this will be a no-op. On 64-bit, this will advance by 4
391   // bytes.
392   PERFETTO_DCHECK(page_header_size_len >= 4);
393   *ptr += page_header_size_len - 4;
394 
395   return base::make_optional(page_header);
396 }
397 
398 // A raw ftrace buffer page consists of a header followed by a sequence of
399 // binary ftrace events. See |ParsePageHeader| for the format of the earlier.
400 //
401 // This method is deliberately static so it can be tested independently.
ParsePagePayload(const uint8_t * start_of_payload,const PageHeader * page_header,const ProtoTranslationTable * table,const FtraceDataSourceConfig * ds_config,CompactSchedBuffer * compact_sched_buffer,FtraceEventBundle * bundle,FtraceMetadata * metadata)402 size_t CpuReader::ParsePagePayload(const uint8_t* start_of_payload,
403                                    const PageHeader* page_header,
404                                    const ProtoTranslationTable* table,
405                                    const FtraceDataSourceConfig* ds_config,
406                                    CompactSchedBuffer* compact_sched_buffer,
407                                    FtraceEventBundle* bundle,
408                                    FtraceMetadata* metadata) {
409   const uint8_t* ptr = start_of_payload;
410   const uint8_t* const end = ptr + page_header->size;
411 
412   uint64_t timestamp = page_header->timestamp;
413 
414   while (ptr < end) {
415     EventHeader event_header;
416     if (!ReadAndAdvance(&ptr, end, &event_header))
417       return 0;
418 
419     timestamp += event_header.time_delta;
420 
421     switch (event_header.type_or_length) {
422       case kTypePadding: {
423         // Left over page padding or discarded event.
424         if (event_header.time_delta == 0) {
425           // Not clear what the correct behaviour is in this case.
426           PERFETTO_DFATAL("Empty padding event.");
427           return 0;
428         }
429         uint32_t length;
430         if (!ReadAndAdvance<uint32_t>(&ptr, end, &length))
431           return 0;
432         // length includes itself (4 bytes)
433         if (length < 4)
434           return 0;
435         ptr += length - 4;
436         break;
437       }
438       case kTypeTimeExtend: {
439         // Extend the time delta.
440         uint32_t time_delta_ext;
441         if (!ReadAndAdvance<uint32_t>(&ptr, end, &time_delta_ext))
442           return 0;
443         // See https://goo.gl/CFBu5x
444         timestamp += (static_cast<uint64_t>(time_delta_ext)) << 27;
445         break;
446       }
447       case kTypeTimeStamp: {
448         // Sync time stamp with external clock.
449         TimeStamp time_stamp;
450         if (!ReadAndAdvance<TimeStamp>(&ptr, end, &time_stamp))
451           return 0;
452         // Not implemented in the kernel, nothing should generate this.
453         PERFETTO_DFATAL("Unimplemented in kernel. Should be unreachable.");
454         break;
455       }
456       // Data record:
457       default: {
458         PERFETTO_CHECK(event_header.type_or_length <= kTypeDataTypeLengthMax);
459         // type_or_length is <=28 so it represents the length of a data
460         // record. if == 0, this is an extended record and the size of the
461         // record is stored in the first uint32_t word in the payload. See
462         // Kernel's include/linux/ring_buffer.h
463         uint32_t event_size;
464         if (event_header.type_or_length == 0) {
465           if (!ReadAndAdvance<uint32_t>(&ptr, end, &event_size))
466             return 0;
467           // Size includes the size field itself.
468           if (event_size < 4)
469             return 0;
470           event_size -= 4;
471         } else {
472           event_size = 4 * event_header.type_or_length;
473         }
474         const uint8_t* start = ptr;
475         const uint8_t* next = ptr + event_size;
476 
477         if (next > end)
478           return 0;
479 
480         uint16_t ftrace_event_id;
481         if (!ReadAndAdvance<uint16_t>(&ptr, end, &ftrace_event_id))
482           return 0;
483 
484         if (ds_config->event_filter.IsEventEnabled(ftrace_event_id)) {
485           // Special-cased handling of some scheduler events when compact format
486           // is enabled.
487           bool compact_sched_enabled = ds_config->compact_sched.enabled;
488           const CompactSchedSwitchFormat& sched_switch_format =
489               table->compact_sched_format().sched_switch;
490           const CompactSchedWakingFormat& sched_waking_format =
491               table->compact_sched_format().sched_waking;
492 
493           // compact sched_switch
494           if (compact_sched_enabled &&
495               ftrace_event_id == sched_switch_format.event_id) {
496             if (event_size < sched_switch_format.size)
497               return 0;
498 
499             ParseSchedSwitchCompact(start, timestamp, &sched_switch_format,
500                                     compact_sched_buffer, metadata);
501 
502             // compact sched_waking
503           } else if (compact_sched_enabled &&
504                      ftrace_event_id == sched_waking_format.event_id) {
505             if (event_size < sched_waking_format.size)
506               return 0;
507 
508             ParseSchedWakingCompact(start, timestamp, &sched_waking_format,
509                                     compact_sched_buffer, metadata);
510 
511           } else {
512             // Common case: parse all other types of enabled events.
513             protos::pbzero::FtraceEvent* event = bundle->add_event();
514             event->set_timestamp(timestamp);
515             if (!ParseEvent(ftrace_event_id, start, next, table, event,
516                             metadata))
517               return 0;
518           }
519         }
520 
521         // Jump to next event.
522         ptr = next;
523       }
524     }
525   }
526   return static_cast<size_t>(ptr - start_of_payload);
527 }
528 
529 // |start| is the start of the current event.
530 // |end| is the end of the buffer.
ParseEvent(uint16_t ftrace_event_id,const uint8_t * start,const uint8_t * end,const ProtoTranslationTable * table,protozero::Message * message,FtraceMetadata * metadata)531 bool CpuReader::ParseEvent(uint16_t ftrace_event_id,
532                            const uint8_t* start,
533                            const uint8_t* end,
534                            const ProtoTranslationTable* table,
535                            protozero::Message* message,
536                            FtraceMetadata* metadata) {
537   PERFETTO_DCHECK(start < end);
538   const size_t length = static_cast<size_t>(end - start);
539 
540   // TODO(hjd): Rework to work even if the event is unknown.
541   const Event& info = *table->GetEventById(ftrace_event_id);
542 
543   // TODO(hjd): Test truncated events.
544   // If the end of the buffer is before the end of the event give up.
545   if (info.size > length) {
546     PERFETTO_DFATAL("Buffer overflowed.");
547     return false;
548   }
549 
550   bool success = true;
551   for (const Field& field : table->common_fields())
552     success &= ParseField(field, start, end, message, metadata);
553 
554   protozero::Message* nested =
555       message->BeginNestedMessage<protozero::Message>(info.proto_field_id);
556 
557   // Parse generic event.
558   if (PERFETTO_UNLIKELY(info.proto_field_id ==
559                         protos::pbzero::FtraceEvent::kGenericFieldNumber)) {
560     nested->AppendString(GenericFtraceEvent::kEventNameFieldNumber, info.name);
561     for (const Field& field : info.fields) {
562       auto generic_field = nested->BeginNestedMessage<protozero::Message>(
563           GenericFtraceEvent::kFieldFieldNumber);
564       // TODO(taylori): Avoid outputting field names every time.
565       generic_field->AppendString(GenericFtraceEvent::Field::kNameFieldNumber,
566                                   field.ftrace_name);
567       success &= ParseField(field, start, end, generic_field, metadata);
568     }
569   } else {  // Parse all other events.
570     for (const Field& field : info.fields) {
571       success &= ParseField(field, start, end, nested, metadata);
572     }
573   }
574 
575   if (PERFETTO_UNLIKELY(info.proto_field_id ==
576                         protos::pbzero::FtraceEvent::kTaskRenameFieldNumber)) {
577     // For task renames, we want to store that the pid was renamed. We use the
578     // common pid to reduce code complexity as in all the cases we care about,
579     // the common pid is the same as the renamed pid (the pid inside the event).
580     PERFETTO_DCHECK(metadata->last_seen_common_pid);
581     metadata->AddRenamePid(metadata->last_seen_common_pid);
582   }
583 
584   // This finalizes |nested| and |proto_field| automatically.
585   message->Finalize();
586   metadata->FinishEvent();
587   return success;
588 }
589 
590 // Caller must guarantee that the field fits in the range,
591 // explicitly: start + field.ftrace_offset + field.ftrace_size <= end
592 // The only exception is fields with strategy = kCStringToString
593 // where the total size isn't known up front. In this case ParseField
594 // will check the string terminates in the bounds and won't read past |end|.
ParseField(const Field & field,const uint8_t * start,const uint8_t * end,protozero::Message * message,FtraceMetadata * metadata)595 bool CpuReader::ParseField(const Field& field,
596                            const uint8_t* start,
597                            const uint8_t* end,
598                            protozero::Message* message,
599                            FtraceMetadata* metadata) {
600   PERFETTO_DCHECK(start + field.ftrace_offset + field.ftrace_size <= end);
601   const uint8_t* field_start = start + field.ftrace_offset;
602   uint32_t field_id = field.proto_field_id;
603 
604   switch (field.strategy) {
605     case kUint8ToUint32:
606     case kUint8ToUint64:
607       ReadIntoVarInt<uint8_t>(field_start, field_id, message);
608       return true;
609     case kUint16ToUint32:
610     case kUint16ToUint64:
611       ReadIntoVarInt<uint16_t>(field_start, field_id, message);
612       return true;
613     case kUint32ToUint32:
614     case kUint32ToUint64:
615       ReadIntoVarInt<uint32_t>(field_start, field_id, message);
616       return true;
617     case kUint64ToUint64:
618       ReadIntoVarInt<uint64_t>(field_start, field_id, message);
619       return true;
620     case kInt8ToInt32:
621     case kInt8ToInt64:
622       ReadIntoVarInt<int8_t>(field_start, field_id, message);
623       return true;
624     case kInt16ToInt32:
625     case kInt16ToInt64:
626       ReadIntoVarInt<int16_t>(field_start, field_id, message);
627       return true;
628     case kInt32ToInt32:
629     case kInt32ToInt64:
630       ReadIntoVarInt<int32_t>(field_start, field_id, message);
631       return true;
632     case kInt64ToInt64:
633       ReadIntoVarInt<int64_t>(field_start, field_id, message);
634       return true;
635     case kFixedCStringToString:
636       // TODO(hjd): Add AppendMaxLength string to protozero.
637       return ReadIntoString(field_start, field_start + field.ftrace_size,
638                             field_id, message);
639     case kCStringToString:
640       // TODO(hjd): Kernel-dive to check this how size:0 char fields work.
641       return ReadIntoString(field_start, end, field.proto_field_id, message);
642     case kStringPtrToString:
643       // TODO(hjd): Figure out how to read these.
644       return true;
645     case kDataLocToString:
646       return ReadDataLoc(start, field_start, end, field, message);
647     case kBoolToUint32:
648     case kBoolToUint64:
649       ReadIntoVarInt<uint8_t>(field_start, field_id, message);
650       return true;
651     case kInode32ToUint64:
652       ReadInode<uint32_t>(field_start, field_id, message, metadata);
653       return true;
654     case kInode64ToUint64:
655       ReadInode<uint64_t>(field_start, field_id, message, metadata);
656       return true;
657     case kPid32ToInt32:
658     case kPid32ToInt64:
659       ReadPid(field_start, field_id, message, metadata);
660       return true;
661     case kCommonPid32ToInt32:
662     case kCommonPid32ToInt64:
663       ReadCommonPid(field_start, field_id, message, metadata);
664       return true;
665     case kDevId32ToUint64:
666       ReadDevId<uint32_t>(field_start, field_id, message, metadata);
667       return true;
668     case kDevId64ToUint64:
669       ReadDevId<uint64_t>(field_start, field_id, message, metadata);
670       return true;
671     case kInvalidTranslationStrategy:
672       break;
673   }
674   PERFETTO_FATAL("Unexpected translation strategy");
675 }
676 
677 // Parse a sched_switch event according to pre-validated format, and buffer the
678 // individual fields in the current compact batch. See the code populating
679 // |CompactSchedSwitchFormat| for the assumptions made around the format, which
680 // this code is closely tied to.
681 // static
ParseSchedSwitchCompact(const uint8_t * start,uint64_t timestamp,const CompactSchedSwitchFormat * format,CompactSchedBuffer * compact_buf,FtraceMetadata * metadata)682 void CpuReader::ParseSchedSwitchCompact(const uint8_t* start,
683                                         uint64_t timestamp,
684                                         const CompactSchedSwitchFormat* format,
685                                         CompactSchedBuffer* compact_buf,
686                                         FtraceMetadata* metadata) {
687   compact_buf->sched_switch().AppendTimestamp(timestamp);
688 
689   int32_t next_pid = ReadValue<int32_t>(start + format->next_pid_offset);
690   compact_buf->sched_switch().next_pid().Append(next_pid);
691   metadata->AddPid(next_pid);
692 
693   int32_t next_prio = ReadValue<int32_t>(start + format->next_prio_offset);
694   compact_buf->sched_switch().next_prio().Append(next_prio);
695 
696   // Varint encoding of int32 and int64 is the same, so treat the value as
697   // int64 after reading.
698   int64_t prev_state = ReadSignedFtraceValue(start + format->prev_state_offset,
699                                              format->prev_state_type);
700   compact_buf->sched_switch().prev_state().Append(prev_state);
701 
702   // next_comm
703   const char* comm_ptr =
704       reinterpret_cast<const char*>(start + format->next_comm_offset);
705   size_t iid = compact_buf->interner().InternComm(comm_ptr);
706   compact_buf->sched_switch().next_comm_index().Append(iid);
707 }
708 
709 // static
ParseSchedWakingCompact(const uint8_t * start,uint64_t timestamp,const CompactSchedWakingFormat * format,CompactSchedBuffer * compact_buf,FtraceMetadata * metadata)710 void CpuReader::ParseSchedWakingCompact(const uint8_t* start,
711                                         uint64_t timestamp,
712                                         const CompactSchedWakingFormat* format,
713                                         CompactSchedBuffer* compact_buf,
714                                         FtraceMetadata* metadata) {
715   compact_buf->sched_waking().AppendTimestamp(timestamp);
716 
717   int32_t pid = ReadValue<int32_t>(start + format->pid_offset);
718   compact_buf->sched_waking().pid().Append(pid);
719   metadata->AddPid(pid);
720 
721   int32_t target_cpu = ReadValue<int32_t>(start + format->target_cpu_offset);
722   compact_buf->sched_waking().target_cpu().Append(target_cpu);
723 
724   int32_t prio = ReadValue<int32_t>(start + format->prio_offset);
725   compact_buf->sched_waking().prio().Append(prio);
726 
727   // comm
728   const char* comm_ptr =
729       reinterpret_cast<const char*>(start + format->comm_offset);
730   size_t iid = compact_buf->interner().InternComm(comm_ptr);
731   compact_buf->sched_waking().comm_index().Append(iid);
732 }
733 
734 }  // namespace perfetto
735