1 // Copyright 2016 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // This file contains APIs for use within Bloaty.  None of these APIs have any
16 // guarantees whatsoever about their stability!  The public API for bloaty is
17 // its command-line interface.
18 
19 #ifndef BLOATY_H_
20 #define BLOATY_H_
21 
22 #include <stdlib.h>
23 #define __STDC_LIMIT_MACROS
24 #define __STDC_FORMAT_MACROS
25 #include <stdint.h>
26 #include <inttypes.h>
27 
28 #include <memory>
29 #include <set>
30 #include <string>
31 #include <unordered_map>
32 #include <vector>
33 
34 #include "absl/strings/string_view.h"
35 #include "absl/strings/strip.h"
36 #include "capstone/capstone.h"
37 #include "re2/re2.h"
38 
39 #include "bloaty.pb.h"
40 #include "range_map.h"
41 
42 #define BLOATY_DISALLOW_COPY_AND_ASSIGN(class_name) \
43   class_name(const class_name&) = delete; \
44   void operator=(const class_name&) = delete;
45 
46 #define BLOATY_UNREACHABLE() do { \
47   assert(false); \
48   __builtin_unreachable(); \
49 } while (0)
50 
51 #ifdef NDEBUG
52 // Prevent "unused variable" warnings.
53 #define BLOATY_ASSERT(expr) do {} while (false && (expr))
54 #else
55 #define BLOATY_ASSERT(expr) assert(expr)
56 #endif
57 
58 namespace bloaty {
59 
60 extern int verbose_level;
61 
62 class NameMunger;
63 class Options;
64 struct DualMap;
65 struct DisassemblyInfo;
66 
67 enum class DataSource {
68   kArchiveMembers,
69   kCompileUnits,
70   kInlines,
71   kInputFiles,
72   kRawRanges,
73   kSections,
74   kSegments,
75 
76   // We always set this to one of the concrete symbol types below before
77   // setting it on a sink.
78   kSymbols,
79 
80   kRawSymbols,
81   kFullSymbols,
82   kShortSymbols
83 };
84 
85 class Error : public std::runtime_error {
86  public:
Error(const char * msg,const char * file,int line)87   Error(const char* msg, const char* file, int line)
88       : std::runtime_error(msg), file_(file), line_(line) {}
89 
90   // TODO(haberman): add these to Bloaty's error message when verbose is
91   // enabled.
file()92   const char* file() const { return file_; }
line()93   int line() const { return line_; }
94 
95  private:
96   const char* file_;
97   int line_;
98 };
99 
100 class InputFile {
101  public:
InputFile(const std::string & filename)102   InputFile(const std::string& filename) : filename_(filename) {}
~InputFile()103   virtual ~InputFile() {}
104 
filename()105   const std::string& filename() const { return filename_; }
data()106   absl::string_view data() const { return data_; }
107 
108  private:
109   BLOATY_DISALLOW_COPY_AND_ASSIGN(InputFile);
110   const std::string filename_;
111 
112  protected:
113   absl::string_view data_;
114 };
115 
116 class InputFileFactory {
117  public:
~InputFileFactory()118   virtual ~InputFileFactory() {}
119 
120   // Throws if the file could not be opened.
121   virtual std::unique_ptr<InputFile> OpenFile(
122       const std::string& filename) const = 0;
123 };
124 
125 class MmapInputFileFactory : public InputFileFactory {
126  public:
127   std::unique_ptr<InputFile> OpenFile(
128       const std::string& filename) const override;
129 };
130 
131 // NOTE: all sizes are uint64, even on 32-bit platforms:
132 //   - 32-bit platforms can have files >4GB in some cases.
133 //   - for object files (not executables/shared libs) we pack both a section
134 //     index and an address into the "vmaddr" value, and we need enough bits to
135 //     safely do this.
136 
137 // A RangeSink allows data sources to assign labels to ranges of VM address
138 // space and/or file offsets.
139 class RangeSink {
140  public:
141   RangeSink(const InputFile* file, const Options& options,
142             DataSource data_source, const DualMap* translator);
143   ~RangeSink();
144 
options()145   const Options& options() const { return options_; }
146 
147   void AddOutput(DualMap* map, const NameMunger* munger);
148 
data_source()149   DataSource data_source() const { return data_source_; }
input_file()150   const InputFile& input_file() const { return *file_; }
IsBaseMap()151   bool IsBaseMap() const { return translator_ == nullptr; }
152 
153   // If vmsize or filesize is zero, this mapping is presumed not to exist in
154   // that domain.  For example, .bss mappings don't exist in the file, and
155   // .debug_* mappings don't exist in memory.
156   void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
157                 uint64_t vmsize, uint64_t fileoff, uint64_t filesize);
158 
AddRange(const char * analyzer,absl::string_view name,uint64_t vmaddr,uint64_t vmsize,absl::string_view file_range)159   void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
160                 uint64_t vmsize, absl::string_view file_range) {
161     AddRange(analyzer, name, vmaddr, vmsize,
162              file_range.data() - file_->data().data(), file_range.size());
163   }
164 
165   void AddFileRange(const char* analyzer, absl::string_view name,
166                     uint64_t fileoff, uint64_t filesize);
167 
168   // Like AddFileRange(), but the label is whatever label was previously
169   // assigned to VM address |label_from_vmaddr|.  If no existing label is
170   // assigned to |label_from_vmaddr|, this function does nothing.
171   void AddFileRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
172                              absl::string_view file_range);
173   void AddVMRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
174                            uint64_t addr, uint64_t size);
175 
176   // Applies this label from |from_file_range| to |file_range|, but only if the
177   // entire |from_file_range| has a single label.  If not, this does nothing.
178   void AddFileRangeForFileRange(const char* analyzer,
179                                 absl::string_view from_file_range,
180                                 absl::string_view file_range);
181 
AddFileRange(const char * analyzer,absl::string_view name,absl::string_view file_range)182   void AddFileRange(const char* analyzer, absl::string_view name,
183                     absl::string_view file_range) {
184     // When separate debug files are being used, the DWARF analyzer will try to
185     // add sections of the debug file.  We want to prevent this because we only
186     // want to profile the main file (not the debug file), so we filter these
187     // out.  This approach is simple to implement, but does result in some
188     // useless work being done.  We may want to avoid doing this useless work in
189     // the first place.
190     if (FileContainsPointer(file_range.data())) {
191       AddFileRange(analyzer, name, file_range.data() - file_->data().data(),
192                    file_range.size());
193     }
194   }
195 
196   // The VM-only functions below may not be used to populate the base map!
197 
198   // Adds a region to the memory map.  It should not overlap any previous
199   // region added with Add(), but it should overlap the base memory map.
200   void AddVMRange(const char* analyzer, uint64_t vmaddr, uint64_t vmsize,
201                   const std::string& name);
202 
203   // Like Add(), but allows that this addr/size might have previously been added
204   // already under a different name.  If so, this name becomes an alias of the
205   // previous name.
206   //
207   // This is for things like symbol tables that sometimes map multiple names to
208   // the same physical function.
209   void AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr,
210                             uint64_t size, const std::string& name);
211 
212   // Like Add(), but allows that this addr/size might have previously been added
213   // already under a different name.  If so, this add is simply ignored.
214   //
215   // This is for cases like sourcefiles.  Sometimes a single function appears to
216   // come from multiple source files.  But if it does, we don't want to alias
217   // the entire source file to another, because it's probably only part of the
218   // source file that overlaps.
219   void AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr,
220                                  uint64_t size, const std::string& name);
221 
MapAtIndex(size_t index)222   const DualMap& MapAtIndex(size_t index) const {
223     return *outputs_[index].first;
224   }
225 
226   // Translates the given pointer (which must be within the range of
227   // input_file().data()) to a VM address.
228   uint64_t TranslateFileToVM(const char* ptr);
229   absl::string_view TranslateVMToFile(uint64_t address);
230 
231   static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
232 
233  private:
234   BLOATY_DISALLOW_COPY_AND_ASSIGN(RangeSink);
235 
FileContainsPointer(const void * ptr)236   bool FileContainsPointer(const void* ptr) const {
237     absl::string_view file_data = file_->data();
238     return ptr >= file_data.data() && ptr < file_data.data() + file_data.size();
239   }
240 
241   bool ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize);
242   bool ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize);
243   bool IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize);
244   bool IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize);
245 
246   const InputFile* file_;
247   const Options options_;
248   DataSource data_source_;
249   const DualMap* translator_;
250   std::vector<std::pair<DualMap*, const NameMunger*>> outputs_;
251 };
252 
253 
254 // NameMunger //////////////////////////////////////////////////////////////////
255 
256 // Use to transform input names according to the user's configuration.
257 // For example, the user can use regexes.
258 class NameMunger {
259  public:
NameMunger()260   NameMunger() {}
261 
262   // Adds a regex that will be applied to all names.  All regexes will be
263   // applied in sequence.
264   void AddRegex(const std::string& regex, const std::string& replacement);
265 
266   std::string Munge(absl::string_view name) const;
267 
IsEmpty()268   bool IsEmpty() const { return regexes_.empty(); }
269 
270  private:
271   BLOATY_DISALLOW_COPY_AND_ASSIGN(NameMunger);
272   std::vector<std::pair<std::unique_ptr<RE2>, std::string>> regexes_;
273 };
274 
275 typedef std::map<absl::string_view, std::pair<uint64_t, uint64_t>> SymbolTable;
276 
277 // Represents an object/executable file in a format like ELF, Mach-O, PE, etc.
278 // To support a new file type, implement this interface.
279 class ObjectFile {
280  public:
ObjectFile(std::unique_ptr<InputFile> file_data)281   ObjectFile(std::unique_ptr<InputFile> file_data)
282       : file_data_(std::move(file_data)), debug_file_(this) {}
~ObjectFile()283   virtual ~ObjectFile() {}
284 
285   virtual std::string GetBuildId() const = 0;
286 
287   // Process this file, pushing data to |sinks| as appropriate for each data
288   // source.  If any debug files match the build id for this file, it will be
289   // given here, otherwise it is |this|.
290   virtual void ProcessFile(const std::vector<RangeSink*>& sinks) const = 0;
291 
292   virtual bool GetDisassemblyInfo(absl::string_view symbol,
293                                   DataSource symbol_source,
294                                   DisassemblyInfo* info) const = 0;
295 
file_data()296   const InputFile& file_data() const { return *file_data_; }
297 
298   // Sets the debug file for |this|.  |file| must outlive this instance.
set_debug_file(const ObjectFile * file)299   void set_debug_file(const ObjectFile* file) {
300     assert(debug_file_->GetBuildId() == GetBuildId());
301     debug_file_ = file;
302   }
303 
debug_file()304   const ObjectFile& debug_file() const { return *debug_file_; }
305 
306  private:
307   std::unique_ptr<InputFile> file_data_;
308   const ObjectFile* debug_file_;
309 };
310 
311 std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file);
312 std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
313 std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
314 
315 namespace dwarf {
316 
317 struct File {
318   absl::string_view debug_info;
319   absl::string_view debug_types;
320   absl::string_view debug_str;
321   absl::string_view debug_abbrev;
322   absl::string_view debug_aranges;
323   absl::string_view debug_line;
324   absl::string_view debug_loc;
325   absl::string_view debug_pubnames;
326   absl::string_view debug_pubtypes;
327   absl::string_view debug_ranges;
328 };
329 
330 }  // namespace dwarf
331 
332 // Provided by dwarf.cc.  To use these, a module should fill in a dwarf::File
333 // and then call these functions.
334 void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
335                            const DualMap& map, RangeSink* sink);
336 void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
337                       bool include_line);
338 void ReadEhFrame(absl::string_view contents, RangeSink* sink);
339 void ReadEhFrameHdr(absl::string_view contents, RangeSink* sink);
340 
341 
342 // LineReader //////////////////////////////////////////////////////////////////
343 
344 // Provides range-based for to iterate over lines in a pipe.
345 //
346 // for ( auto& line : ReadLinesFromPipe("ls -l") ) {
347 // }
348 
349 class LineIterator;
350 
351 class LineReader {
352  public:
LineReader(FILE * file,bool pclose)353   LineReader(FILE* file, bool pclose) : file_(file), pclose_(pclose) {}
354   LineReader(LineReader&& other);
355 
~LineReader()356   ~LineReader() { Close(); }
357 
358   LineIterator begin();
359   LineIterator end();
360 
361   void Next();
362 
line()363   const std::string& line() const { return line_; }
eof()364   bool eof() { return eof_; }
365 
366  private:
367   BLOATY_DISALLOW_COPY_AND_ASSIGN(LineReader);
368 
369   void Close();
370 
371   FILE* file_;
372   std::string line_;
373   bool eof_ = false;
374   bool pclose_;
375 };
376 
377 class LineIterator {
378  public:
LineIterator(LineReader * reader)379   LineIterator(LineReader* reader) : reader_(reader) {}
380 
381   bool operator!=(const LineIterator& /*other*/) const {
382     // Hack for range-based for.
383     return !reader_->eof();
384   }
385 
386   void operator++() { reader_->Next(); }
387 
388   const std::string& operator*() const {
389     return reader_->line();
390   }
391 
392  private:
393   LineReader* reader_;
394 };
395 
396 LineReader ReadLinesFromPipe(const std::string& cmd);
397 
398 // Demangle C++ symbols according to the Itanium ABI.  The |source| argument
399 // controls what demangling mode we are using.
400 std::string ItaniumDemangle(absl::string_view symbol, DataSource source);
401 
402 
403 // DualMap /////////////////////////////////////////////////////////////////////
404 
405 // Contains a RangeMap for VM space and file space for a given file.
406 
407 struct DualMap {
408   RangeMap vm_map;
409   RangeMap file_map;
410 };
411 
412 struct DisassemblyInfo {
413   absl::string_view text;
414   DualMap symbol_map;
415   cs_arch arch;
416   cs_mode mode;
417   uint64_t start_address;
418 };
419 
420 std::string DisassembleFunction(const DisassemblyInfo& info);
421 void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink);
422 
423 // Top-level API ///////////////////////////////////////////////////////////////
424 
425 // This should only be used by main.cc and unit tests.
426 
427 class Rollup;
428 
429 struct RollupRow {
RollupRowRollupRow430   RollupRow(const std::string& name_) : name(name_) {}
431 
432   std::string name;
433   int64_t vmsize = 0;
434   int64_t filesize = 0;
435   int64_t filtered_vmsize = 0;
436   int64_t filtered_filesize = 0;
437   int64_t other_count = 0;
438   int64_t sortkey;
439   double vmpercent;
440   double filepercent;
441   std::vector<RollupRow> sorted_children;
442 
CompareRollupRow443   static bool Compare(const RollupRow& a, const RollupRow& b) {
444     // Sort value high-to-low.
445     if (a.sortkey != b.sortkey) {
446       return a.sortkey > b.sortkey;
447     }
448     // Sort name low to high.
449     return a.name < b.name;
450   }
451 };
452 
453 enum class OutputFormat {
454   kPrettyPrint,
455   kCSV,
456   kTSV,
457 };
458 
459 enum class ShowDomain {
460   kShowFile,
461   kShowVM,
462   kShowBoth,
463 };
464 
465 struct OutputOptions {
466   OutputFormat output_format = OutputFormat::kPrettyPrint;
467   size_t max_label_len = 80;
468   ShowDomain show = ShowDomain::kShowBoth;
469 };
470 
471 struct RollupOutput {
472  public:
RollupOutputRollupOutput473   RollupOutput() : toplevel_row_("TOTAL") {}
474 
AddDataSourceNameRollupOutput475   void AddDataSourceName(absl::string_view name) {
476     source_names_.emplace_back(std::string(name));
477   }
478 
source_namesRollupOutput479   const std::vector<std::string>& source_names() const { return source_names_; }
480 
PrintRollupOutput481   void Print(const OutputOptions& options, std::ostream* out) {
482     if (!source_names_.empty()) {
483       switch (options.output_format) {
484         case bloaty::OutputFormat::kPrettyPrint:
485           PrettyPrint(options, out);
486           break;
487         case bloaty::OutputFormat::kCSV:
488           PrintToCSV(out, /*tabs=*/false);
489           break;
490         case bloaty::OutputFormat::kTSV:
491           PrintToCSV(out, /*tabs=*/true);
492           break;
493         default:
494           BLOATY_UNREACHABLE();
495       }
496     }
497 
498     if (!disassembly_.empty()) {
499       *out << disassembly_;
500     }
501   }
502 
SetDisassemblyRollupOutput503   void SetDisassembly(absl::string_view disassembly) {
504     disassembly_ = std::string(disassembly);
505   }
506 
GetDisassemblyRollupOutput507   absl::string_view GetDisassembly() { return disassembly_; }
508 
509   // For debugging.
toplevel_rowRollupOutput510   const RollupRow& toplevel_row() const { return toplevel_row_; }
diff_modeRollupOutput511   bool diff_mode() const { return diff_mode_; }
512 
513  private:
514   BLOATY_DISALLOW_COPY_AND_ASSIGN(RollupOutput);
515   friend class Rollup;
516 
517   std::vector<std::string> source_names_;
518   RollupRow toplevel_row_;
519   std::string disassembly_;
520 
521   // When we are in diff mode, rollup sizes are relative to the baseline.
522   bool diff_mode_ = false;
523 
524   static bool IsSame(const std::string& a, const std::string& b);
525   void PrettyPrint(const OutputOptions& options, std::ostream* out) const;
526   void PrintToCSV(std::ostream* out, bool tabs) const;
527   void PrettyPrintRow(const RollupRow& row, size_t indent,
528                       const OutputOptions& options, std::ostream* out) const;
529   void PrettyPrintTree(const RollupRow& row, size_t indent,
530                        const OutputOptions& options, std::ostream* out) const;
531   void PrintRowToCSV(const RollupRow& row,
532                      std::vector<std::string> parent_labels,
533                      std::ostream* out, bool tabs) const;
534   void PrintTreeToCSV(const RollupRow& row,
535                       std::vector<std::string> parent_labels,
536                       std::ostream* out, bool tabs) const;
537 };
538 
539 bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options,
540                   OutputOptions* output_options, std::string* error);
541 bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
542                 RollupOutput* output, std::string* error);
543 
544 // Endianness utilities ////////////////////////////////////////////////////////
545 
IsLittleEndian()546 inline bool IsLittleEndian() {
547   int x = 1;
548   return *(char*)&x == 1;
549 }
550 
551 // It seems like it would be simpler to just specialize on:
552 //   template <class T> T ByteSwap(T val);
553 //   template <> T ByteSwap<uint16>(T val) { /* ... */ }
554 //   template <> T ByteSwap<uint32>(T val) { /* ... */ }
555 //   // etc...
556 //
557 // But this doesn't work out so well.  Consider that on LP32, uint32 could
558 // be either "unsigned int" or "unsigned long".  Specializing ByteSwap<uint32>
559 // will leave one of those two unspecialized.  C++ is annoying in this regard.
560 // Our approach here handles both cases with just one specialization.
561 template <class T, size_t size> struct ByteSwapper { T operator()(T val); };
562 
563 template <class T>
564 struct ByteSwapper<T, 1> {
565   T operator()(T val) { return val; }
566 };
567 
568 template <class T>
569 struct ByteSwapper<T, 2> {
570   T operator()(T val) {
571     return ((val & 0xff) << 8) |
572         ((val & 0xff00) >> 8);
573   }
574 };
575 
576 template <class T>
577 struct ByteSwapper<T, 4> {
578   T operator()(T val) {
579     return ((val & 0xff) << 24) |
580         ((val & 0xff00) << 8) |
581         ((val & 0xff0000ULL) >> 8) |
582         ((val & 0xff000000ULL) >> 24);
583   }
584 };
585 
586 template <class T>
587 struct ByteSwapper<T, 8> {
588   T operator()(T val) {
589     return ((val & 0xff) << 56) |
590         ((val & 0xff00) << 40) |
591         ((val & 0xff0000) << 24) |
592         ((val & 0xff000000) << 8) |
593         ((val & 0xff00000000ULL) >> 8) |
594         ((val & 0xff0000000000ULL) >> 24) |
595         ((val & 0xff000000000000ULL) >> 40) |
596         ((val & 0xff00000000000000ULL) >> 56);
597   }
598 };
599 
600 template <class T>
601 T ByteSwap(T val) { return ByteSwapper<T, sizeof(T)>()(val); }
602 
603 }  // namespace bloaty
604 
605 #endif
606