1 // Copyright 2016 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // This file contains APIs for use within Bloaty. None of these APIs have any
16 // guarantees whatsoever about their stability! The public API for bloaty is
17 // its command-line interface.
18
19 #ifndef BLOATY_H_
20 #define BLOATY_H_
21
22 #include <stdlib.h>
23 #define __STDC_LIMIT_MACROS
24 #define __STDC_FORMAT_MACROS
25 #include <stdint.h>
26 #include <inttypes.h>
27
28 #include <memory>
29 #include <set>
30 #include <string>
31 #include <unordered_map>
32 #include <vector>
33
34 #include "absl/strings/string_view.h"
35 #include "absl/strings/strip.h"
36 #include "capstone/capstone.h"
37 #include "re2/re2.h"
38
39 #include "bloaty.pb.h"
40 #include "range_map.h"
41
42 #define BLOATY_DISALLOW_COPY_AND_ASSIGN(class_name) \
43 class_name(const class_name&) = delete; \
44 void operator=(const class_name&) = delete;
45
46 #define BLOATY_UNREACHABLE() do { \
47 assert(false); \
48 __builtin_unreachable(); \
49 } while (0)
50
51 #ifdef NDEBUG
52 // Prevent "unused variable" warnings.
53 #define BLOATY_ASSERT(expr) do {} while (false && (expr))
54 #else
55 #define BLOATY_ASSERT(expr) assert(expr)
56 #endif
57
58 namespace bloaty {
59
60 extern int verbose_level;
61
62 class NameMunger;
63 class Options;
64 struct DualMap;
65 struct DisassemblyInfo;
66
67 enum class DataSource {
68 kArchiveMembers,
69 kCompileUnits,
70 kInlines,
71 kInputFiles,
72 kRawRanges,
73 kSections,
74 kSegments,
75
76 // We always set this to one of the concrete symbol types below before
77 // setting it on a sink.
78 kSymbols,
79
80 kRawSymbols,
81 kFullSymbols,
82 kShortSymbols
83 };
84
85 class Error : public std::runtime_error {
86 public:
Error(const char * msg,const char * file,int line)87 Error(const char* msg, const char* file, int line)
88 : std::runtime_error(msg), file_(file), line_(line) {}
89
90 // TODO(haberman): add these to Bloaty's error message when verbose is
91 // enabled.
file()92 const char* file() const { return file_; }
line()93 int line() const { return line_; }
94
95 private:
96 const char* file_;
97 int line_;
98 };
99
100 class InputFile {
101 public:
InputFile(const std::string & filename)102 InputFile(const std::string& filename) : filename_(filename) {}
~InputFile()103 virtual ~InputFile() {}
104
filename()105 const std::string& filename() const { return filename_; }
data()106 absl::string_view data() const { return data_; }
107
108 private:
109 BLOATY_DISALLOW_COPY_AND_ASSIGN(InputFile);
110 const std::string filename_;
111
112 protected:
113 absl::string_view data_;
114 };
115
116 class InputFileFactory {
117 public:
~InputFileFactory()118 virtual ~InputFileFactory() {}
119
120 // Throws if the file could not be opened.
121 virtual std::unique_ptr<InputFile> OpenFile(
122 const std::string& filename) const = 0;
123 };
124
125 class MmapInputFileFactory : public InputFileFactory {
126 public:
127 std::unique_ptr<InputFile> OpenFile(
128 const std::string& filename) const override;
129 };
130
131 // NOTE: all sizes are uint64, even on 32-bit platforms:
132 // - 32-bit platforms can have files >4GB in some cases.
133 // - for object files (not executables/shared libs) we pack both a section
134 // index and an address into the "vmaddr" value, and we need enough bits to
135 // safely do this.
136
137 // A RangeSink allows data sources to assign labels to ranges of VM address
138 // space and/or file offsets.
139 class RangeSink {
140 public:
141 RangeSink(const InputFile* file, const Options& options,
142 DataSource data_source, const DualMap* translator);
143 ~RangeSink();
144
options()145 const Options& options() const { return options_; }
146
147 void AddOutput(DualMap* map, const NameMunger* munger);
148
data_source()149 DataSource data_source() const { return data_source_; }
input_file()150 const InputFile& input_file() const { return *file_; }
IsBaseMap()151 bool IsBaseMap() const { return translator_ == nullptr; }
152
153 // If vmsize or filesize is zero, this mapping is presumed not to exist in
154 // that domain. For example, .bss mappings don't exist in the file, and
155 // .debug_* mappings don't exist in memory.
156 void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
157 uint64_t vmsize, uint64_t fileoff, uint64_t filesize);
158
AddRange(const char * analyzer,absl::string_view name,uint64_t vmaddr,uint64_t vmsize,absl::string_view file_range)159 void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
160 uint64_t vmsize, absl::string_view file_range) {
161 AddRange(analyzer, name, vmaddr, vmsize,
162 file_range.data() - file_->data().data(), file_range.size());
163 }
164
165 void AddFileRange(const char* analyzer, absl::string_view name,
166 uint64_t fileoff, uint64_t filesize);
167
168 // Like AddFileRange(), but the label is whatever label was previously
169 // assigned to VM address |label_from_vmaddr|. If no existing label is
170 // assigned to |label_from_vmaddr|, this function does nothing.
171 void AddFileRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
172 absl::string_view file_range);
173 void AddVMRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
174 uint64_t addr, uint64_t size);
175
176 // Applies this label from |from_file_range| to |file_range|, but only if the
177 // entire |from_file_range| has a single label. If not, this does nothing.
178 void AddFileRangeForFileRange(const char* analyzer,
179 absl::string_view from_file_range,
180 absl::string_view file_range);
181
AddFileRange(const char * analyzer,absl::string_view name,absl::string_view file_range)182 void AddFileRange(const char* analyzer, absl::string_view name,
183 absl::string_view file_range) {
184 // When separate debug files are being used, the DWARF analyzer will try to
185 // add sections of the debug file. We want to prevent this because we only
186 // want to profile the main file (not the debug file), so we filter these
187 // out. This approach is simple to implement, but does result in some
188 // useless work being done. We may want to avoid doing this useless work in
189 // the first place.
190 if (FileContainsPointer(file_range.data())) {
191 AddFileRange(analyzer, name, file_range.data() - file_->data().data(),
192 file_range.size());
193 }
194 }
195
196 // The VM-only functions below may not be used to populate the base map!
197
198 // Adds a region to the memory map. It should not overlap any previous
199 // region added with Add(), but it should overlap the base memory map.
200 void AddVMRange(const char* analyzer, uint64_t vmaddr, uint64_t vmsize,
201 const std::string& name);
202
203 // Like Add(), but allows that this addr/size might have previously been added
204 // already under a different name. If so, this name becomes an alias of the
205 // previous name.
206 //
207 // This is for things like symbol tables that sometimes map multiple names to
208 // the same physical function.
209 void AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr,
210 uint64_t size, const std::string& name);
211
212 // Like Add(), but allows that this addr/size might have previously been added
213 // already under a different name. If so, this add is simply ignored.
214 //
215 // This is for cases like sourcefiles. Sometimes a single function appears to
216 // come from multiple source files. But if it does, we don't want to alias
217 // the entire source file to another, because it's probably only part of the
218 // source file that overlaps.
219 void AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr,
220 uint64_t size, const std::string& name);
221
MapAtIndex(size_t index)222 const DualMap& MapAtIndex(size_t index) const {
223 return *outputs_[index].first;
224 }
225
226 // Translates the given pointer (which must be within the range of
227 // input_file().data()) to a VM address.
228 uint64_t TranslateFileToVM(const char* ptr);
229 absl::string_view TranslateVMToFile(uint64_t address);
230
231 static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
232
233 private:
234 BLOATY_DISALLOW_COPY_AND_ASSIGN(RangeSink);
235
FileContainsPointer(const void * ptr)236 bool FileContainsPointer(const void* ptr) const {
237 absl::string_view file_data = file_->data();
238 return ptr >= file_data.data() && ptr < file_data.data() + file_data.size();
239 }
240
241 bool ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize);
242 bool ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize);
243 bool IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize);
244 bool IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize);
245
246 const InputFile* file_;
247 const Options options_;
248 DataSource data_source_;
249 const DualMap* translator_;
250 std::vector<std::pair<DualMap*, const NameMunger*>> outputs_;
251 };
252
253
254 // NameMunger //////////////////////////////////////////////////////////////////
255
256 // Use to transform input names according to the user's configuration.
257 // For example, the user can use regexes.
258 class NameMunger {
259 public:
NameMunger()260 NameMunger() {}
261
262 // Adds a regex that will be applied to all names. All regexes will be
263 // applied in sequence.
264 void AddRegex(const std::string& regex, const std::string& replacement);
265
266 std::string Munge(absl::string_view name) const;
267
IsEmpty()268 bool IsEmpty() const { return regexes_.empty(); }
269
270 private:
271 BLOATY_DISALLOW_COPY_AND_ASSIGN(NameMunger);
272 std::vector<std::pair<std::unique_ptr<RE2>, std::string>> regexes_;
273 };
274
275 typedef std::map<absl::string_view, std::pair<uint64_t, uint64_t>> SymbolTable;
276
277 // Represents an object/executable file in a format like ELF, Mach-O, PE, etc.
278 // To support a new file type, implement this interface.
279 class ObjectFile {
280 public:
ObjectFile(std::unique_ptr<InputFile> file_data)281 ObjectFile(std::unique_ptr<InputFile> file_data)
282 : file_data_(std::move(file_data)), debug_file_(this) {}
~ObjectFile()283 virtual ~ObjectFile() {}
284
285 virtual std::string GetBuildId() const = 0;
286
287 // Process this file, pushing data to |sinks| as appropriate for each data
288 // source. If any debug files match the build id for this file, it will be
289 // given here, otherwise it is |this|.
290 virtual void ProcessFile(const std::vector<RangeSink*>& sinks) const = 0;
291
292 virtual bool GetDisassemblyInfo(absl::string_view symbol,
293 DataSource symbol_source,
294 DisassemblyInfo* info) const = 0;
295
file_data()296 const InputFile& file_data() const { return *file_data_; }
297
298 // Sets the debug file for |this|. |file| must outlive this instance.
set_debug_file(const ObjectFile * file)299 void set_debug_file(const ObjectFile* file) {
300 assert(debug_file_->GetBuildId() == GetBuildId());
301 debug_file_ = file;
302 }
303
debug_file()304 const ObjectFile& debug_file() const { return *debug_file_; }
305
306 private:
307 std::unique_ptr<InputFile> file_data_;
308 const ObjectFile* debug_file_;
309 };
310
311 std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file);
312 std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
313 std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
314
315 namespace dwarf {
316
317 struct File {
318 absl::string_view debug_info;
319 absl::string_view debug_types;
320 absl::string_view debug_str;
321 absl::string_view debug_abbrev;
322 absl::string_view debug_aranges;
323 absl::string_view debug_line;
324 absl::string_view debug_loc;
325 absl::string_view debug_pubnames;
326 absl::string_view debug_pubtypes;
327 absl::string_view debug_ranges;
328 };
329
330 } // namespace dwarf
331
332 // Provided by dwarf.cc. To use these, a module should fill in a dwarf::File
333 // and then call these functions.
334 void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
335 const DualMap& map, RangeSink* sink);
336 void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
337 bool include_line);
338 void ReadEhFrame(absl::string_view contents, RangeSink* sink);
339 void ReadEhFrameHdr(absl::string_view contents, RangeSink* sink);
340
341
342 // LineReader //////////////////////////////////////////////////////////////////
343
344 // Provides range-based for to iterate over lines in a pipe.
345 //
346 // for ( auto& line : ReadLinesFromPipe("ls -l") ) {
347 // }
348
349 class LineIterator;
350
351 class LineReader {
352 public:
LineReader(FILE * file,bool pclose)353 LineReader(FILE* file, bool pclose) : file_(file), pclose_(pclose) {}
354 LineReader(LineReader&& other);
355
~LineReader()356 ~LineReader() { Close(); }
357
358 LineIterator begin();
359 LineIterator end();
360
361 void Next();
362
line()363 const std::string& line() const { return line_; }
eof()364 bool eof() { return eof_; }
365
366 private:
367 BLOATY_DISALLOW_COPY_AND_ASSIGN(LineReader);
368
369 void Close();
370
371 FILE* file_;
372 std::string line_;
373 bool eof_ = false;
374 bool pclose_;
375 };
376
377 class LineIterator {
378 public:
LineIterator(LineReader * reader)379 LineIterator(LineReader* reader) : reader_(reader) {}
380
381 bool operator!=(const LineIterator& /*other*/) const {
382 // Hack for range-based for.
383 return !reader_->eof();
384 }
385
386 void operator++() { reader_->Next(); }
387
388 const std::string& operator*() const {
389 return reader_->line();
390 }
391
392 private:
393 LineReader* reader_;
394 };
395
396 LineReader ReadLinesFromPipe(const std::string& cmd);
397
398 // Demangle C++ symbols according to the Itanium ABI. The |source| argument
399 // controls what demangling mode we are using.
400 std::string ItaniumDemangle(absl::string_view symbol, DataSource source);
401
402
403 // DualMap /////////////////////////////////////////////////////////////////////
404
405 // Contains a RangeMap for VM space and file space for a given file.
406
407 struct DualMap {
408 RangeMap vm_map;
409 RangeMap file_map;
410 };
411
412 struct DisassemblyInfo {
413 absl::string_view text;
414 DualMap symbol_map;
415 cs_arch arch;
416 cs_mode mode;
417 uint64_t start_address;
418 };
419
420 std::string DisassembleFunction(const DisassemblyInfo& info);
421 void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink);
422
423 // Top-level API ///////////////////////////////////////////////////////////////
424
425 // This should only be used by main.cc and unit tests.
426
427 class Rollup;
428
429 struct RollupRow {
RollupRowRollupRow430 RollupRow(const std::string& name_) : name(name_) {}
431
432 std::string name;
433 int64_t vmsize = 0;
434 int64_t filesize = 0;
435 int64_t filtered_vmsize = 0;
436 int64_t filtered_filesize = 0;
437 int64_t other_count = 0;
438 int64_t sortkey;
439 double vmpercent;
440 double filepercent;
441 std::vector<RollupRow> sorted_children;
442
CompareRollupRow443 static bool Compare(const RollupRow& a, const RollupRow& b) {
444 // Sort value high-to-low.
445 if (a.sortkey != b.sortkey) {
446 return a.sortkey > b.sortkey;
447 }
448 // Sort name low to high.
449 return a.name < b.name;
450 }
451 };
452
453 enum class OutputFormat {
454 kPrettyPrint,
455 kCSV,
456 kTSV,
457 };
458
459 enum class ShowDomain {
460 kShowFile,
461 kShowVM,
462 kShowBoth,
463 };
464
465 struct OutputOptions {
466 OutputFormat output_format = OutputFormat::kPrettyPrint;
467 size_t max_label_len = 80;
468 ShowDomain show = ShowDomain::kShowBoth;
469 };
470
471 struct RollupOutput {
472 public:
RollupOutputRollupOutput473 RollupOutput() : toplevel_row_("TOTAL") {}
474
AddDataSourceNameRollupOutput475 void AddDataSourceName(absl::string_view name) {
476 source_names_.emplace_back(std::string(name));
477 }
478
source_namesRollupOutput479 const std::vector<std::string>& source_names() const { return source_names_; }
480
PrintRollupOutput481 void Print(const OutputOptions& options, std::ostream* out) {
482 if (!source_names_.empty()) {
483 switch (options.output_format) {
484 case bloaty::OutputFormat::kPrettyPrint:
485 PrettyPrint(options, out);
486 break;
487 case bloaty::OutputFormat::kCSV:
488 PrintToCSV(out, /*tabs=*/false);
489 break;
490 case bloaty::OutputFormat::kTSV:
491 PrintToCSV(out, /*tabs=*/true);
492 break;
493 default:
494 BLOATY_UNREACHABLE();
495 }
496 }
497
498 if (!disassembly_.empty()) {
499 *out << disassembly_;
500 }
501 }
502
SetDisassemblyRollupOutput503 void SetDisassembly(absl::string_view disassembly) {
504 disassembly_ = std::string(disassembly);
505 }
506
GetDisassemblyRollupOutput507 absl::string_view GetDisassembly() { return disassembly_; }
508
509 // For debugging.
toplevel_rowRollupOutput510 const RollupRow& toplevel_row() const { return toplevel_row_; }
diff_modeRollupOutput511 bool diff_mode() const { return diff_mode_; }
512
513 private:
514 BLOATY_DISALLOW_COPY_AND_ASSIGN(RollupOutput);
515 friend class Rollup;
516
517 std::vector<std::string> source_names_;
518 RollupRow toplevel_row_;
519 std::string disassembly_;
520
521 // When we are in diff mode, rollup sizes are relative to the baseline.
522 bool diff_mode_ = false;
523
524 static bool IsSame(const std::string& a, const std::string& b);
525 void PrettyPrint(const OutputOptions& options, std::ostream* out) const;
526 void PrintToCSV(std::ostream* out, bool tabs) const;
527 void PrettyPrintRow(const RollupRow& row, size_t indent,
528 const OutputOptions& options, std::ostream* out) const;
529 void PrettyPrintTree(const RollupRow& row, size_t indent,
530 const OutputOptions& options, std::ostream* out) const;
531 void PrintRowToCSV(const RollupRow& row,
532 std::vector<std::string> parent_labels,
533 std::ostream* out, bool tabs) const;
534 void PrintTreeToCSV(const RollupRow& row,
535 std::vector<std::string> parent_labels,
536 std::ostream* out, bool tabs) const;
537 };
538
539 bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options,
540 OutputOptions* output_options, std::string* error);
541 bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
542 RollupOutput* output, std::string* error);
543
544 // Endianness utilities ////////////////////////////////////////////////////////
545
IsLittleEndian()546 inline bool IsLittleEndian() {
547 int x = 1;
548 return *(char*)&x == 1;
549 }
550
551 // It seems like it would be simpler to just specialize on:
552 // template <class T> T ByteSwap(T val);
553 // template <> T ByteSwap<uint16>(T val) { /* ... */ }
554 // template <> T ByteSwap<uint32>(T val) { /* ... */ }
555 // // etc...
556 //
557 // But this doesn't work out so well. Consider that on LP32, uint32 could
558 // be either "unsigned int" or "unsigned long". Specializing ByteSwap<uint32>
559 // will leave one of those two unspecialized. C++ is annoying in this regard.
560 // Our approach here handles both cases with just one specialization.
561 template <class T, size_t size> struct ByteSwapper { T operator()(T val); };
562
563 template <class T>
564 struct ByteSwapper<T, 1> {
565 T operator()(T val) { return val; }
566 };
567
568 template <class T>
569 struct ByteSwapper<T, 2> {
570 T operator()(T val) {
571 return ((val & 0xff) << 8) |
572 ((val & 0xff00) >> 8);
573 }
574 };
575
576 template <class T>
577 struct ByteSwapper<T, 4> {
578 T operator()(T val) {
579 return ((val & 0xff) << 24) |
580 ((val & 0xff00) << 8) |
581 ((val & 0xff0000ULL) >> 8) |
582 ((val & 0xff000000ULL) >> 24);
583 }
584 };
585
586 template <class T>
587 struct ByteSwapper<T, 8> {
588 T operator()(T val) {
589 return ((val & 0xff) << 56) |
590 ((val & 0xff00) << 40) |
591 ((val & 0xff0000) << 24) |
592 ((val & 0xff000000) << 8) |
593 ((val & 0xff00000000ULL) >> 8) |
594 ((val & 0xff0000000000ULL) >> 24) |
595 ((val & 0xff000000000000ULL) >> 40) |
596 ((val & 0xff00000000000000ULL) >> 56);
597 }
598 };
599
600 template <class T>
601 T ByteSwap(T val) { return ByteSwapper<T, sizeof(T)>()(val); }
602
603 } // namespace bloaty
604
605 #endif
606