1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <algorithm>
18 #include <vector>
19 
20 #include <google/protobuf/compiler/importer.h>
21 #include <google/protobuf/dynamic_message.h>
22 #include <google/protobuf/io/zero_copy_stream_impl.h>
23 
24 #include "perfetto/ext/base/file_utils.h"
25 #include "perfetto/ext/base/scoped_file.h"
26 #include "perfetto/protozero/field.h"
27 #include "perfetto/protozero/packed_repeated_fields.h"
28 #include "perfetto/protozero/proto_decoder.h"
29 #include "perfetto/protozero/proto_utils.h"
30 #include "perfetto/protozero/scattered_heap_buffer.h"
31 
32 #include "protos/third_party/pprof/profile.pbzero.h"
33 
34 namespace perfetto {
35 namespace protoprofile {
36 namespace {
37 
38 using protozero::proto_utils::ProtoWireType;
39 using ::google::protobuf::Descriptor;
40 using ::google::protobuf::DynamicMessageFactory;
41 using ::google::protobuf::FieldDescriptor;
42 using ::google::protobuf::FileDescriptor;
43 using ::google::protobuf::Message;
44 using ::google::protobuf::compiler::DiskSourceTree;
45 using ::google::protobuf::compiler::Importer;
46 using ::google::protobuf::compiler::MultiFileErrorCollector;
47 
48 class MultiFileErrorCollectorImpl : public MultiFileErrorCollector {
49  public:
50   ~MultiFileErrorCollectorImpl() override;
51   void AddError(const std::string& filename,
52                 int line,
53                 int column,
54                 const std::string& message) override;
55 
56   void AddWarning(const std::string& filename,
57                   int line,
58                   int column,
59                   const std::string& message) override;
60 };
61 
62 MultiFileErrorCollectorImpl::~MultiFileErrorCollectorImpl() = default;
63 
AddError(const std::string & filename,int line,int column,const std::string & message)64 void MultiFileErrorCollectorImpl::AddError(const std::string& filename,
65                                            int line,
66                                            int column,
67                                            const std::string& message) {
68   PERFETTO_ELOG("Error %s %d:%d: %s", filename.c_str(), line, column,
69                 message.c_str());
70 }
71 
AddWarning(const std::string & filename,int line,int column,const std::string & message)72 void MultiFileErrorCollectorImpl::AddWarning(const std::string& filename,
73                                              int line,
74                                              int column,
75                                              const std::string& message) {
76   PERFETTO_ELOG("Error %s %d:%d: %s", filename.c_str(), line, column,
77                 message.c_str());
78 }
79 
80 class SizeProfileComputer {
81  public:
82   std::string Compute(const uint8_t* ptr,
83                       size_t size,
84                       const Descriptor* descriptor);
85 
86  private:
87   struct StackInfo {
88     std::vector<size_t> samples;
89     std::vector<int> locations;
90   };
91 
92   void ComputeInner(const uint8_t* ptr,
93                     size_t size,
94                     const Descriptor* descriptor);
95   void Sample(size_t size);
96   int InternString(const std::string& str);
97   int InternLocation(const std::string& str);
98   size_t GetFieldSize(const protozero::Field& f);
99 
100   // Convert the current stack into a string:
101   // {"Foo", "#bar", "Bar", "#baz", "int"} -> "Foo$#bar$Bar$#baz$int$"
102   std::string StackToKey();
103 
104   // The current 'stack' we're considering as we parse the protobuf.
105   // For example if we're currently looking at the varint field baz which is
106   // nested inside message Bar which is in turn a field named bar on the message
107   // Foo. Then the stack would be: Foo, #bar, Bar, #baz, int
108   // We keep track of both the field names (#bar, #baz) and the field types
109   // (Foo, Bar, int) as sometimes we are intrested in which fields are big
110   // and sometimes which types are big.
111   std::vector<std::string> stack_;
112 
113   // Information about each stack seen. Keyed by a unique string for each stack.
114   std::map<std::string, StackInfo> stack_info_;
115 
116   // Interned strings:
117   std::vector<std::string> strings_;
118   std::map<std::string, int> string_to_id_;
119 
120   // Interned 'locations', each location is a single frame of the stack.
121   std::map<std::string, int> locations_;
122 };
123 
GetFieldSize(const protozero::Field & f)124 size_t SizeProfileComputer::GetFieldSize(const protozero::Field& f) {
125   uint8_t buf[10];
126   switch (f.type()) {
127     case protozero::proto_utils::ProtoWireType::kVarInt:
128       return static_cast<size_t>(
129           protozero::proto_utils::WriteVarInt(f.as_uint64(), buf) - buf);
130     case protozero::proto_utils::ProtoWireType::kLengthDelimited:
131       return f.size();
132     case protozero::proto_utils::ProtoWireType::kFixed32:
133       return 4;
134     case protozero::proto_utils::ProtoWireType::kFixed64:
135       return 8;
136   }
137   PERFETTO_FATAL("unexpected field type");  // for gcc
138 }
139 
InternString(const std::string & s)140 int SizeProfileComputer::InternString(const std::string& s) {
141   if (string_to_id_.count(s)) {
142     return string_to_id_[s];
143   }
144   strings_.push_back(s);
145   int id = static_cast<int>(strings_.size() - 1);
146   string_to_id_[s] = id;
147   return id;
148 }
149 
InternLocation(const std::string & s)150 int SizeProfileComputer::InternLocation(const std::string& s) {
151   if (locations_.count(s)) {
152     return locations_[s];
153   }
154   int id = static_cast<int>(locations_.size()) + 1;
155   locations_[s] = id;
156   return id;
157 }
158 
StackToKey()159 std::string SizeProfileComputer::StackToKey() {
160   std::string key;
161   for (const std::string& part : stack_) {
162     key += part;
163     key += "$";
164   }
165   return key;
166 }
167 
Sample(size_t size)168 void SizeProfileComputer::Sample(size_t size) {
169   const std::string& key = StackToKey();
170 
171   if (!stack_info_.count(key)) {
172     StackInfo& info = stack_info_[key];
173     info.locations.resize(stack_.size());
174     for (size_t i = 0; i < stack_.size(); i++) {
175       info.locations[i] = InternLocation(stack_[i]);
176     }
177   }
178 
179   stack_info_[key].samples.push_back(size);
180 }
181 
Compute(const uint8_t * ptr,size_t size,const Descriptor * descriptor)182 std::string SizeProfileComputer::Compute(const uint8_t* ptr,
183                                          size_t size,
184                                          const Descriptor* descriptor) {
185   PERFETTO_CHECK(InternString("") == 0);
186   ComputeInner(ptr, size, descriptor);
187   protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>
188       profile;
189 
190   auto* sample_type = profile->add_sample_type();
191   sample_type->set_type(InternString("protos"));
192   sample_type->set_unit(InternString("count"));
193 
194   sample_type = profile->add_sample_type();
195   sample_type->set_type(InternString("max_size"));
196   sample_type->set_unit(InternString("bytes"));
197 
198   sample_type = profile->add_sample_type();
199   sample_type->set_type(InternString("min_size"));
200   sample_type->set_unit(InternString("bytes"));
201 
202   sample_type = profile->add_sample_type();
203   sample_type->set_type(InternString("median"));
204   sample_type->set_unit(InternString("bytes"));
205 
206   sample_type = profile->add_sample_type();
207   sample_type->set_type(InternString("total_size"));
208   sample_type->set_unit(InternString("bytes"));
209 
210   // For each unique stack we've seen write out the stats:
211   for (auto& id_info : stack_info_) {
212     StackInfo& info = id_info.second;
213 
214     protozero::PackedVarInt location_ids;
215     auto* sample = profile->add_sample();
216     for (auto it = info.locations.rbegin(); it != info.locations.rend(); ++it) {
217       location_ids.Append(static_cast<uint64_t>(*it));
218     }
219     sample->set_location_id(location_ids);
220 
221     std::sort(info.samples.begin(), info.samples.end());
222     size_t count = info.samples.size();
223     size_t total_size = 0;
224     size_t max_size = info.samples[count - 1];
225     size_t min_size = info.samples[0];
226     size_t median_size = info.samples[count / 2];
227     for (size_t i = 0; i < count; ++i)
228       total_size += info.samples[i];
229     // These have to be in the same order as the sample types above:
230     protozero::PackedVarInt values;
231     values.Append(static_cast<int64_t>(count));
232     values.Append(static_cast<int64_t>(max_size));
233     values.Append(static_cast<int64_t>(min_size));
234     values.Append(static_cast<int64_t>(median_size));
235     values.Append(static_cast<int64_t>(total_size));
236     sample->set_value(values);
237   }
238 
239   // The proto profile has a two step mapping where samples are associated with
240   // locations which in turn are associated to functions. We don't currently
241   // distinguish them so we make a 1:1 mapping between the locations and the
242   // functions:
243   for (const auto& location_id : locations_) {
244     auto* location = profile->add_location();
245     location->set_id(static_cast<uint64_t>(location_id.second));
246     auto* line = location->add_line();
247     line->set_function_id(static_cast<uint64_t>(location_id.second));
248   }
249 
250   for (const auto& location_id : locations_) {
251     auto* function = profile->add_function();
252     function->set_id(static_cast<uint64_t>(location_id.second));
253     function->set_name(InternString(location_id.first));
254   }
255 
256   // Finally the string table. We intern more strings above, so this has to be
257   // last.
258   for (int i = 0; i < static_cast<int>(strings_.size()); i++) {
259     profile->add_string_table(strings_[static_cast<size_t>(i)]);
260   }
261   return profile.SerializeAsString();
262 }
263 
ComputeInner(const uint8_t * ptr,size_t size,const Descriptor * descriptor)264 void SizeProfileComputer::ComputeInner(const uint8_t* ptr,
265                                        size_t size,
266                                        const Descriptor* descriptor) {
267   size_t overhead = size;
268   size_t unknown = 0;
269   protozero::ProtoDecoder decoder(ptr, size);
270 
271   stack_.push_back(descriptor->name());
272 
273   // Compute the size of each sub-field of this message, subtracting it
274   // from overhead and possible adding it to unknown.
275   for (;;) {
276     if (decoder.bytes_left() == 0)
277       break;
278     protozero::Field field = decoder.ReadField();
279     if (!field.valid()) {
280       PERFETTO_ELOG("Field not valid (can mean field id >1000)");
281       break;
282     }
283 
284     int id = field.id();
285     ProtoWireType type = field.type();
286     size_t field_size = GetFieldSize(field);
287 
288     overhead -= field_size;
289     const FieldDescriptor* field_descriptor = descriptor->FindFieldByNumber(id);
290     if (!field_descriptor) {
291       unknown += field_size;
292       continue;
293     }
294 
295     stack_.push_back("#" + field_descriptor->name());
296     bool is_message_type =
297         field_descriptor->type() == FieldDescriptor::TYPE_MESSAGE;
298     if (type == ProtoWireType::kLengthDelimited && is_message_type) {
299       ComputeInner(field.data(), field.size(),
300                    field_descriptor->message_type());
301     } else {
302       stack_.push_back(field_descriptor->type_name());
303       Sample(field_size);
304       stack_.pop_back();
305     }
306     stack_.pop_back();
307   }
308 
309   if (unknown) {
310     stack_.push_back("#:unknown:");
311     Sample(unknown);
312     stack_.pop_back();
313   }
314 
315   // Anything not blamed on a child is overhead for this message.
316   Sample(overhead);
317   stack_.pop_back();
318 }
319 
PrintUsage(int,const char ** argv)320 int PrintUsage(int, const char** argv) {
321   fprintf(stderr, "Usage: %s INPUT_PATH OUTPUT_PATH\n", argv[0]);
322   return 1;
323 }
324 
Main(int argc,const char ** argv)325 int Main(int argc, const char** argv) {
326   if (argc != 3)
327     return PrintUsage(argc, argv);
328 
329   const char* input_path = argv[1];
330   const char* output_path = argv[2];
331 
332   base::ScopedFile proto_fd = base::OpenFile(input_path, O_RDONLY);
333   if (!proto_fd) {
334     PERFETTO_ELOG("Could not open input path (%s)", input_path);
335     return 1;
336   }
337 
338   std::string s;
339   base::ReadFileDescriptor(proto_fd.get(), &s);
340 
341   const Descriptor* descriptor;
342   DiskSourceTree dst;
343   dst.MapPath("", "");
344   MultiFileErrorCollectorImpl mfe;
345   Importer importer(&dst, &mfe);
346   const FileDescriptor* parsed_file =
347       importer.Import("protos/perfetto/trace/trace.proto");
348   DynamicMessageFactory dmf;
349   descriptor = parsed_file->message_type(0);
350 
351   const uint8_t* start = reinterpret_cast<const uint8_t*>(s.data());
352   size_t size = s.size();
353 
354   if (!descriptor) {
355     PERFETTO_ELOG("Could not parse trace.proto");
356     return 1;
357   }
358 
359   base::ScopedFile output_fd =
360       base::OpenFile(output_path, O_WRONLY | O_TRUNC | O_CREAT, 0600);
361   if (!output_fd) {
362     PERFETTO_ELOG("Could not open output path (%s)", output_path);
363     return 1;
364   }
365   SizeProfileComputer computer;
366   std::string out = computer.Compute(start, size, descriptor);
367   base::WriteAll(output_fd.get(), out.data(), out.size());
368   base::FlushFile(output_fd.get());
369 
370   return 0;
371 }
372 
373 }  // namespace
374 }  // namespace protoprofile
375 }  // namespace perfetto
376 
main(int argc,const char ** argv)377 int main(int argc, const char** argv) {
378   return perfetto::protoprofile::Main(argc, argv);
379 }
380