1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <algorithm>
18 #include <vector>
19
20 #include <google/protobuf/compiler/importer.h>
21 #include <google/protobuf/dynamic_message.h>
22 #include <google/protobuf/io/zero_copy_stream_impl.h>
23
24 #include "perfetto/ext/base/file_utils.h"
25 #include "perfetto/ext/base/scoped_file.h"
26 #include "perfetto/protozero/field.h"
27 #include "perfetto/protozero/packed_repeated_fields.h"
28 #include "perfetto/protozero/proto_decoder.h"
29 #include "perfetto/protozero/proto_utils.h"
30 #include "perfetto/protozero/scattered_heap_buffer.h"
31
32 #include "protos/third_party/pprof/profile.pbzero.h"
33
34 namespace perfetto {
35 namespace protoprofile {
36 namespace {
37
38 using protozero::proto_utils::ProtoWireType;
39 using ::google::protobuf::Descriptor;
40 using ::google::protobuf::DynamicMessageFactory;
41 using ::google::protobuf::FieldDescriptor;
42 using ::google::protobuf::FileDescriptor;
43 using ::google::protobuf::Message;
44 using ::google::protobuf::compiler::DiskSourceTree;
45 using ::google::protobuf::compiler::Importer;
46 using ::google::protobuf::compiler::MultiFileErrorCollector;
47
48 class MultiFileErrorCollectorImpl : public MultiFileErrorCollector {
49 public:
50 ~MultiFileErrorCollectorImpl() override;
51 void AddError(const std::string& filename,
52 int line,
53 int column,
54 const std::string& message) override;
55
56 void AddWarning(const std::string& filename,
57 int line,
58 int column,
59 const std::string& message) override;
60 };
61
62 MultiFileErrorCollectorImpl::~MultiFileErrorCollectorImpl() = default;
63
AddError(const std::string & filename,int line,int column,const std::string & message)64 void MultiFileErrorCollectorImpl::AddError(const std::string& filename,
65 int line,
66 int column,
67 const std::string& message) {
68 PERFETTO_ELOG("Error %s %d:%d: %s", filename.c_str(), line, column,
69 message.c_str());
70 }
71
AddWarning(const std::string & filename,int line,int column,const std::string & message)72 void MultiFileErrorCollectorImpl::AddWarning(const std::string& filename,
73 int line,
74 int column,
75 const std::string& message) {
76 PERFETTO_ELOG("Error %s %d:%d: %s", filename.c_str(), line, column,
77 message.c_str());
78 }
79
80 class SizeProfileComputer {
81 public:
82 std::string Compute(const uint8_t* ptr,
83 size_t size,
84 const Descriptor* descriptor);
85
86 private:
87 struct StackInfo {
88 std::vector<size_t> samples;
89 std::vector<int> locations;
90 };
91
92 void ComputeInner(const uint8_t* ptr,
93 size_t size,
94 const Descriptor* descriptor);
95 void Sample(size_t size);
96 int InternString(const std::string& str);
97 int InternLocation(const std::string& str);
98 size_t GetFieldSize(const protozero::Field& f);
99
100 // Convert the current stack into a string:
101 // {"Foo", "#bar", "Bar", "#baz", "int"} -> "Foo$#bar$Bar$#baz$int$"
102 std::string StackToKey();
103
104 // The current 'stack' we're considering as we parse the protobuf.
105 // For example if we're currently looking at the varint field baz which is
106 // nested inside message Bar which is in turn a field named bar on the message
107 // Foo. Then the stack would be: Foo, #bar, Bar, #baz, int
108 // We keep track of both the field names (#bar, #baz) and the field types
109 // (Foo, Bar, int) as sometimes we are intrested in which fields are big
110 // and sometimes which types are big.
111 std::vector<std::string> stack_;
112
113 // Information about each stack seen. Keyed by a unique string for each stack.
114 std::map<std::string, StackInfo> stack_info_;
115
116 // Interned strings:
117 std::vector<std::string> strings_;
118 std::map<std::string, int> string_to_id_;
119
120 // Interned 'locations', each location is a single frame of the stack.
121 std::map<std::string, int> locations_;
122 };
123
GetFieldSize(const protozero::Field & f)124 size_t SizeProfileComputer::GetFieldSize(const protozero::Field& f) {
125 uint8_t buf[10];
126 switch (f.type()) {
127 case protozero::proto_utils::ProtoWireType::kVarInt:
128 return static_cast<size_t>(
129 protozero::proto_utils::WriteVarInt(f.as_uint64(), buf) - buf);
130 case protozero::proto_utils::ProtoWireType::kLengthDelimited:
131 return f.size();
132 case protozero::proto_utils::ProtoWireType::kFixed32:
133 return 4;
134 case protozero::proto_utils::ProtoWireType::kFixed64:
135 return 8;
136 }
137 PERFETTO_FATAL("unexpected field type"); // for gcc
138 }
139
InternString(const std::string & s)140 int SizeProfileComputer::InternString(const std::string& s) {
141 if (string_to_id_.count(s)) {
142 return string_to_id_[s];
143 }
144 strings_.push_back(s);
145 int id = static_cast<int>(strings_.size() - 1);
146 string_to_id_[s] = id;
147 return id;
148 }
149
InternLocation(const std::string & s)150 int SizeProfileComputer::InternLocation(const std::string& s) {
151 if (locations_.count(s)) {
152 return locations_[s];
153 }
154 int id = static_cast<int>(locations_.size()) + 1;
155 locations_[s] = id;
156 return id;
157 }
158
StackToKey()159 std::string SizeProfileComputer::StackToKey() {
160 std::string key;
161 for (const std::string& part : stack_) {
162 key += part;
163 key += "$";
164 }
165 return key;
166 }
167
Sample(size_t size)168 void SizeProfileComputer::Sample(size_t size) {
169 const std::string& key = StackToKey();
170
171 if (!stack_info_.count(key)) {
172 StackInfo& info = stack_info_[key];
173 info.locations.resize(stack_.size());
174 for (size_t i = 0; i < stack_.size(); i++) {
175 info.locations[i] = InternLocation(stack_[i]);
176 }
177 }
178
179 stack_info_[key].samples.push_back(size);
180 }
181
Compute(const uint8_t * ptr,size_t size,const Descriptor * descriptor)182 std::string SizeProfileComputer::Compute(const uint8_t* ptr,
183 size_t size,
184 const Descriptor* descriptor) {
185 PERFETTO_CHECK(InternString("") == 0);
186 ComputeInner(ptr, size, descriptor);
187 protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>
188 profile;
189
190 auto* sample_type = profile->add_sample_type();
191 sample_type->set_type(InternString("protos"));
192 sample_type->set_unit(InternString("count"));
193
194 sample_type = profile->add_sample_type();
195 sample_type->set_type(InternString("max_size"));
196 sample_type->set_unit(InternString("bytes"));
197
198 sample_type = profile->add_sample_type();
199 sample_type->set_type(InternString("min_size"));
200 sample_type->set_unit(InternString("bytes"));
201
202 sample_type = profile->add_sample_type();
203 sample_type->set_type(InternString("median"));
204 sample_type->set_unit(InternString("bytes"));
205
206 sample_type = profile->add_sample_type();
207 sample_type->set_type(InternString("total_size"));
208 sample_type->set_unit(InternString("bytes"));
209
210 // For each unique stack we've seen write out the stats:
211 for (auto& id_info : stack_info_) {
212 StackInfo& info = id_info.second;
213
214 protozero::PackedVarInt location_ids;
215 auto* sample = profile->add_sample();
216 for (auto it = info.locations.rbegin(); it != info.locations.rend(); ++it) {
217 location_ids.Append(static_cast<uint64_t>(*it));
218 }
219 sample->set_location_id(location_ids);
220
221 std::sort(info.samples.begin(), info.samples.end());
222 size_t count = info.samples.size();
223 size_t total_size = 0;
224 size_t max_size = info.samples[count - 1];
225 size_t min_size = info.samples[0];
226 size_t median_size = info.samples[count / 2];
227 for (size_t i = 0; i < count; ++i)
228 total_size += info.samples[i];
229 // These have to be in the same order as the sample types above:
230 protozero::PackedVarInt values;
231 values.Append(static_cast<int64_t>(count));
232 values.Append(static_cast<int64_t>(max_size));
233 values.Append(static_cast<int64_t>(min_size));
234 values.Append(static_cast<int64_t>(median_size));
235 values.Append(static_cast<int64_t>(total_size));
236 sample->set_value(values);
237 }
238
239 // The proto profile has a two step mapping where samples are associated with
240 // locations which in turn are associated to functions. We don't currently
241 // distinguish them so we make a 1:1 mapping between the locations and the
242 // functions:
243 for (const auto& location_id : locations_) {
244 auto* location = profile->add_location();
245 location->set_id(static_cast<uint64_t>(location_id.second));
246 auto* line = location->add_line();
247 line->set_function_id(static_cast<uint64_t>(location_id.second));
248 }
249
250 for (const auto& location_id : locations_) {
251 auto* function = profile->add_function();
252 function->set_id(static_cast<uint64_t>(location_id.second));
253 function->set_name(InternString(location_id.first));
254 }
255
256 // Finally the string table. We intern more strings above, so this has to be
257 // last.
258 for (int i = 0; i < static_cast<int>(strings_.size()); i++) {
259 profile->add_string_table(strings_[static_cast<size_t>(i)]);
260 }
261 return profile.SerializeAsString();
262 }
263
ComputeInner(const uint8_t * ptr,size_t size,const Descriptor * descriptor)264 void SizeProfileComputer::ComputeInner(const uint8_t* ptr,
265 size_t size,
266 const Descriptor* descriptor) {
267 size_t overhead = size;
268 size_t unknown = 0;
269 protozero::ProtoDecoder decoder(ptr, size);
270
271 stack_.push_back(descriptor->name());
272
273 // Compute the size of each sub-field of this message, subtracting it
274 // from overhead and possible adding it to unknown.
275 for (;;) {
276 if (decoder.bytes_left() == 0)
277 break;
278 protozero::Field field = decoder.ReadField();
279 if (!field.valid()) {
280 PERFETTO_ELOG("Field not valid (can mean field id >1000)");
281 break;
282 }
283
284 int id = field.id();
285 ProtoWireType type = field.type();
286 size_t field_size = GetFieldSize(field);
287
288 overhead -= field_size;
289 const FieldDescriptor* field_descriptor = descriptor->FindFieldByNumber(id);
290 if (!field_descriptor) {
291 unknown += field_size;
292 continue;
293 }
294
295 stack_.push_back("#" + field_descriptor->name());
296 bool is_message_type =
297 field_descriptor->type() == FieldDescriptor::TYPE_MESSAGE;
298 if (type == ProtoWireType::kLengthDelimited && is_message_type) {
299 ComputeInner(field.data(), field.size(),
300 field_descriptor->message_type());
301 } else {
302 stack_.push_back(field_descriptor->type_name());
303 Sample(field_size);
304 stack_.pop_back();
305 }
306 stack_.pop_back();
307 }
308
309 if (unknown) {
310 stack_.push_back("#:unknown:");
311 Sample(unknown);
312 stack_.pop_back();
313 }
314
315 // Anything not blamed on a child is overhead for this message.
316 Sample(overhead);
317 stack_.pop_back();
318 }
319
PrintUsage(int,const char ** argv)320 int PrintUsage(int, const char** argv) {
321 fprintf(stderr, "Usage: %s INPUT_PATH OUTPUT_PATH\n", argv[0]);
322 return 1;
323 }
324
Main(int argc,const char ** argv)325 int Main(int argc, const char** argv) {
326 if (argc != 3)
327 return PrintUsage(argc, argv);
328
329 const char* input_path = argv[1];
330 const char* output_path = argv[2];
331
332 base::ScopedFile proto_fd = base::OpenFile(input_path, O_RDONLY);
333 if (!proto_fd) {
334 PERFETTO_ELOG("Could not open input path (%s)", input_path);
335 return 1;
336 }
337
338 std::string s;
339 base::ReadFileDescriptor(proto_fd.get(), &s);
340
341 const Descriptor* descriptor;
342 DiskSourceTree dst;
343 dst.MapPath("", "");
344 MultiFileErrorCollectorImpl mfe;
345 Importer importer(&dst, &mfe);
346 const FileDescriptor* parsed_file =
347 importer.Import("protos/perfetto/trace/trace.proto");
348 DynamicMessageFactory dmf;
349 descriptor = parsed_file->message_type(0);
350
351 const uint8_t* start = reinterpret_cast<const uint8_t*>(s.data());
352 size_t size = s.size();
353
354 if (!descriptor) {
355 PERFETTO_ELOG("Could not parse trace.proto");
356 return 1;
357 }
358
359 base::ScopedFile output_fd =
360 base::OpenFile(output_path, O_WRONLY | O_TRUNC | O_CREAT, 0600);
361 if (!output_fd) {
362 PERFETTO_ELOG("Could not open output path (%s)", output_path);
363 return 1;
364 }
365 SizeProfileComputer computer;
366 std::string out = computer.Compute(start, size, descriptor);
367 base::WriteAll(output_fd.get(), out.data(), out.size());
368 base::FlushFile(output_fd.get());
369
370 return 0;
371 }
372
373 } // namespace
374 } // namespace protoprofile
375 } // namespace perfetto
376
main(int argc,const char ** argv)377 int main(int argc, const char** argv) {
378 return perfetto::protoprofile::Main(argc, argv);
379 }
380