1 // -*- mode: C++ -*-
2 
3 // Copyright (c) 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 
32 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33 
34 // macho_reader.h: A class for parsing Mach-O files.
35 
36 #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
37 #define BREAKPAD_COMMON_MAC_MACHO_READER_H_
38 
39 #include <mach-o/loader.h>
40 #include <mach-o/fat.h>
41 #include <stdint.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 
45 #include <map>
46 #include <string>
47 #include <vector>
48 
49 #include "common/byte_cursor.h"
50 #include "common/mac/super_fat_arch.h"
51 
52 namespace google_breakpad {
53 namespace mach_o {
54 
55 using std::map;
56 using std::string;
57 using std::vector;
58 
59 // The Mac headers don't specify particular types for these groups of
60 // constants, but defining them here provides some documentation
61 // value.  We also give them the same width as the fields in which
62 // they appear, which makes them a bit easier to use with ByteCursors.
63 typedef uint32_t Magic;
64 typedef uint32_t FileType;
65 typedef uint32_t FileFlags;
66 typedef uint32_t LoadCommandType;
67 typedef uint32_t SegmentFlags;
68 typedef uint32_t SectionFlags;
69 
70 // A parser for fat binary files, used to store universal binaries.
71 // When applied to a (non-fat) Mach-O file, this behaves as if the
72 // file were a fat file containing a single object file.
73 class FatReader {
74  public:
75 
76   // A class for reporting errors found while parsing fat binary files. The
77   // default definitions of these methods print messages to stderr.
78   class Reporter {
79    public:
80     // Create a reporter that attributes problems to |filename|.
Reporter(const string & filename)81     explicit Reporter(const string &filename) : filename_(filename) { }
82 
~Reporter()83     virtual ~Reporter() { }
84 
85     // The data does not begin with a fat binary or Mach-O magic number.
86     // This is a fatal error.
87     virtual void BadHeader();
88 
89     // The Mach-O fat binary file ends abruptly, without enough space
90     // to contain an object file it claims is present.
91     virtual void MisplacedObjectFile();
92 
93     // The file ends abruptly: either it is not large enough to hold a
94     // complete header, or the header implies that contents are present
95     // beyond the actual end of the file.
96     virtual void TooShort();
97 
98    private:
99     // The filename to which the reader should attribute problems.
100     string filename_;
101   };
102 
103   // Create a fat binary file reader that uses |reporter| to report problems.
FatReader(Reporter * reporter)104   explicit FatReader(Reporter *reporter) : reporter_(reporter) { }
105 
106   // Read the |size| bytes at |buffer| as a fat binary file. On success,
107   // return true; on failure, report the problem to reporter_ and return
108   // false.
109   //
110   // If the data is a plain Mach-O file, rather than a fat binary file,
111   // then the reader behaves as if it had found a fat binary file whose
112   // single object file is the Mach-O file.
113   bool Read(const uint8_t *buffer, size_t size);
114 
115   // Return an array of 'SuperFatArch' structures describing the
116   // object files present in this fat binary file. Set |size| to the
117   // number of elements in the array.
118   //
119   // Assuming Read returned true, the entries are validated: it is safe to
120   // assume that the offsets and sizes in each SuperFatArch refer to subranges
121   // of the bytes passed to Read.
122   //
123   // If there are no object files in this fat binary, then this
124   // function can return NULL.
125   //
126   // The array is owned by this FatReader instance; it will be freed when
127   // this FatReader is destroyed.
128   //
129   // This function returns a C-style array instead of a vector to make it
130   // possible to use the result with OS X functions like NXFindBestFatArch,
131   // so that the symbol dumper will behave consistently with other OS X
132   // utilities that work with fat binaries.
object_files(size_t * count)133   const SuperFatArch* object_files(size_t *count) const {
134     *count = object_files_.size();
135     if (object_files_.size() > 0)
136       return &object_files_[0];
137     return NULL;
138   }
139 
140  private:
141   // We use this to report problems parsing the file's contents. (WEAK)
142   Reporter *reporter_;
143 
144   // The contents of the fat binary or Mach-O file we're parsing. We do not
145   // own the storage it refers to.
146   ByteBuffer buffer_;
147 
148   // The magic number of this binary, in host byte order.
149   Magic magic_;
150 
151   // The list of object files in this binary.
152   // object_files_.size() == fat_header.nfat_arch
153   vector<SuperFatArch> object_files_;
154 };
155 
156 // A segment in a Mach-O file. All these fields have been byte-swapped as
157 // appropriate for use by the executing architecture.
158 struct Segment {
159   // The ByteBuffers below point into the bytes passed to the Reader that
160   // created this Segment.
161 
162   ByteBuffer section_list;    // This segment's section list.
163   ByteBuffer contents;        // This segment's contents.
164 
165   // This segment's name.
166   string name;
167 
168   // The address at which this segment should be loaded in memory. If
169   // bits_64 is false, only the bottom 32 bits of this value are valid.
170   uint64_t vmaddr;
171 
172   // The size of this segment when loaded into memory. This may be larger
173   // than contents.Size(), in which case the extra area will be
174   // initialized with zeros. If bits_64 is false, only the bottom 32 bits
175   // of this value are valid.
176   uint64_t vmsize;
177 
178   // The file offset and size of the segment in the Mach-O image.
179   uint64_t fileoff;
180   uint64_t filesize;
181 
182   // The maximum and initial VM protection of this segment's contents.
183   uint32_t maxprot;
184   uint32_t initprot;
185 
186   // The number of sections in section_list.
187   uint32_t nsects;
188 
189   // Flags describing this segment, from SegmentFlags.
190   uint32_t flags;
191 
192   // True if this is a 64-bit section; false if it is a 32-bit section.
193   bool bits_64;
194 };
195 
196 // A section in a Mach-O file. All these fields have been byte-swapped as
197 // appropriate for use by the executing architecture.
198 struct Section {
199   // This section's contents. This points into the bytes passed to the
200   // Reader that created this Section.
201   ByteBuffer contents;
202 
203   // This section's name.
204   string section_name;  // section[_64].sectname
205   // The name of the segment this section belongs to.
206   string segment_name;  // section[_64].segname
207 
208   // The address at which this section's contents should be loaded in
209   // memory. If bits_64 is false, only the bottom 32 bits of this value
210   // are valid.
211   uint64_t address;
212 
213   // The contents of this section should be loaded into memory at an
214   // address which is a multiple of (two raised to this power).
215   uint32_t align;
216 
217   // Flags from SectionFlags describing the section's contents.
218   uint32_t flags;
219 
220   // We don't support reading relocations yet.
221 
222   // True if this is a 64-bit section; false if it is a 32-bit section.
223   bool bits_64;
224 };
225 
226 // A map from section names to Sections.
227 typedef map<string, Section> SectionMap;
228 
229 // A reader for a Mach-O file.
230 //
231 // This does not handle fat binaries; see FatReader above. FatReader
232 // provides a friendly interface for parsing data that could be either a
233 // fat binary or a Mach-O file.
234 class Reader {
235  public:
236 
237   // A class for reporting errors found while parsing Mach-O files. The
238   // default definitions of these member functions print messages to
239   // stderr.
240   class Reporter {
241    public:
242     // Create a reporter that attributes problems to |filename|.
Reporter(const string & filename)243     explicit Reporter(const string &filename) : filename_(filename) { }
~Reporter()244     virtual ~Reporter() { }
245 
246     // Reporter functions for fatal errors return void; the reader will
247     // definitely return an error to its caller after calling them
248 
249     // The data does not begin with a Mach-O magic number, or the magic
250     // number does not match the expected value for the cpu architecture.
251     // This is a fatal error.
252     virtual void BadHeader();
253 
254     // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
255     // does not match the expected CPU architecture
256     // (|expected_cpu_type|, |expected_cpu_subtype|).
257     virtual void CPUTypeMismatch(cpu_type_t cpu_type,
258                                  cpu_subtype_t cpu_subtype,
259                                  cpu_type_t expected_cpu_type,
260                                  cpu_subtype_t expected_cpu_subtype);
261 
262     // The file ends abruptly: either it is not large enough to hold a
263     // complete header, or the header implies that contents are present
264     // beyond the actual end of the file.
265     virtual void HeaderTruncated();
266 
267     // The file's load command region, as given in the Mach-O header, is
268     // too large for the file.
269     virtual void LoadCommandRegionTruncated();
270 
271     // The file's Mach-O header claims the file contains |claimed| load
272     // commands, but the I'th load command, of type |type|, extends beyond
273     // the end of the load command region, as given by the Mach-O header.
274     // If |type| is zero, the command's type was unreadable.
275     virtual void LoadCommandsOverrun(size_t claimed, size_t i,
276                                      LoadCommandType type);
277 
278     // The contents of the |i|'th load command, of type |type|, extend beyond
279     // the size given in the load command's header.
280     virtual void LoadCommandTooShort(size_t i, LoadCommandType type);
281 
282     // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
283     // |name| is too short to hold the sections that its header says it does.
284     // (This more specific than LoadCommandTooShort.)
285     virtual void SectionsMissing(const string &name);
286 
287     // The segment named |name| claims that its contents lie beyond the end
288     // of the file.
289     virtual void MisplacedSegmentData(const string &name);
290 
291     // The section named |section| in the segment named |segment| claims that
292     // its contents do not lie entirely within the segment.
293     virtual void MisplacedSectionData(const string &section,
294                                       const string &segment);
295 
296     // The LC_SYMTAB command claims that symbol table contents are located
297     // beyond the end of the file.
298     virtual void MisplacedSymbolTable();
299 
300     // An attempt was made to read a Mach-O file of the unsupported
301     // CPU architecture |cpu_type|.
302     virtual void UnsupportedCPUType(cpu_type_t cpu_type);
303 
304    private:
305     string filename_;
306   };
307 
308   // A handler for sections parsed from a segment. The WalkSegmentSections
309   // member function accepts an instance of this class, and applies it to
310   // each section defined in a given segment.
311   class SectionHandler {
312    public:
~SectionHandler()313     virtual ~SectionHandler() { }
314 
315     // Called to report that the segment's section list contains |section|.
316     // This should return true if the iteration should continue, or false
317     // if it should stop.
318     virtual bool HandleSection(const Section &section) = 0;
319   };
320 
321   // A handler for the load commands in a Mach-O file.
322   class LoadCommandHandler {
323    public:
LoadCommandHandler()324     LoadCommandHandler() { }
~LoadCommandHandler()325     virtual ~LoadCommandHandler() { }
326 
327     // When called from WalkLoadCommands, the following handler functions
328     // should return true if they wish to continue iterating over the load
329     // command list, or false if they wish to stop iterating.
330     //
331     // When called from LoadCommandIterator::Handle or Reader::Handle,
332     // these functions' return values are simply passed through to Handle's
333     // caller.
334     //
335     // The definitions provided by this base class simply return true; the
336     // default is to silently ignore sections whose member functions the
337     // subclass doesn't override.
338 
339     // COMMAND is load command we don't recognize. We provide only the
340     // command type and a ByteBuffer enclosing the command's data (If we
341     // cannot parse the command type or its size, we call
342     // reporter_->IncompleteLoadCommand instead.)
UnknownCommand(LoadCommandType type,const ByteBuffer & contents)343     virtual bool UnknownCommand(LoadCommandType type,
344                                 const ByteBuffer &contents) {
345       return true;
346     }
347 
348     // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
349     // with the properties given in |segment|.
SegmentCommand(const Segment & segment)350     virtual bool SegmentCommand(const Segment &segment) {
351       return true;
352     }
353 
354     // The load command is LC_SYMTAB. |entries| holds the array of nlist
355     // entries, and |names| holds the strings the entries refer to.
SymtabCommand(const ByteBuffer & entries,const ByteBuffer & names)356     virtual bool SymtabCommand(const ByteBuffer &entries,
357                                const ByteBuffer &names) {
358       return true;
359     }
360 
361     // Add handler functions for more load commands here as needed.
362   };
363 
364   // Create a Mach-O file reader that reports problems to |reporter|.
Reader(Reporter * reporter)365   explicit Reader(Reporter *reporter)
366       : reporter_(reporter) { }
367 
368   // Read the given data as a Mach-O file. The reader retains pointers
369   // into the data passed, so the data should live as long as the reader
370   // does. On success, return true; on failure, return false.
371   //
372   // At most one of these functions should be invoked once on each Reader
373   // instance.
374   bool Read(const uint8_t *buffer,
375             size_t size,
376             cpu_type_t expected_cpu_type,
377             cpu_subtype_t expected_cpu_subtype);
Read(const ByteBuffer & buffer,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)378   bool Read(const ByteBuffer &buffer,
379             cpu_type_t expected_cpu_type,
380             cpu_subtype_t expected_cpu_subtype) {
381     return Read(buffer.start,
382                 buffer.Size(),
383                 expected_cpu_type,
384                 expected_cpu_subtype);
385   }
386 
387   // Return this file's characteristics, as found in the Mach-O header.
cpu_type()388   cpu_type_t    cpu_type()    const { return cpu_type_; }
cpu_subtype()389   cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
file_type()390   FileType      file_type()   const { return file_type_; }
flags()391   FileFlags     flags()       const { return flags_; }
392 
393   // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
394   // Mach-O file.
bits_64()395   bool bits_64() const { return bits_64_; }
396 
397   // Return true if this is a big-endian Mach-O file, false if it is
398   // little-endian.
big_endian()399   bool big_endian() const { return big_endian_; }
400 
401   // Apply |handler| to each load command in this Mach-O file, stopping when
402   // a handler function returns false. If we encounter a malformed load
403   // command, report it via reporter_ and return false. Return true if all
404   // load commands were parseable and all handlers returned true.
405   bool WalkLoadCommands(LoadCommandHandler *handler) const;
406 
407   // Set |segment| to describe the segment named |name|, if present. If
408   // found, |segment|'s byte buffers refer to a subregion of the bytes
409   // passed to Read. If we find the section, return true; otherwise,
410   // return false.
411   bool FindSegment(const string &name, Segment *segment) const;
412 
413   // Apply |handler| to each section defined in |segment|. If |handler| returns
414   // false, stop iterating and return false. If all calls to |handler| return
415   // true and we reach the end of the section list, return true.
416   bool WalkSegmentSections(const Segment &segment, SectionHandler *handler)
417     const;
418 
419   // Clear |section_map| and then populate it with a map of the sections
420   // in |segment|, from section names to Section structures.
421   // Each Section's contents refer to bytes in |segment|'s contents.
422   // On success, return true; if a problem occurs, report it and return false.
423   bool MapSegmentSections(const Segment &segment, SectionMap *section_map)
424     const;
425 
426  private:
427   // Used internally.
428   class SegmentFinder;
429   class SectionMapper;
430 
431   // We use this to report problems parsing the file's contents. (WEAK)
432   Reporter *reporter_;
433 
434   // The contents of the Mach-O file we're parsing. We do not own the
435   // storage it refers to.
436   ByteBuffer buffer_;
437 
438   // True if this file is big-endian.
439   bool big_endian_;
440 
441   // True if this file is a 64-bit Mach-O file.
442   bool bits_64_;
443 
444   // This file's cpu type and subtype.
445   cpu_type_t cpu_type_;        // mach_header[_64].cputype
446   cpu_subtype_t cpu_subtype_;  // mach_header[_64].cpusubtype
447 
448   // This file's type.
449   FileType file_type_;         // mach_header[_64].filetype
450 
451   // The region of buffer_ occupied by load commands.
452   ByteBuffer load_commands_;
453 
454   // The number of load commands in load_commands_.
455   uint32_t load_command_count_;  // mach_header[_64].ncmds
456 
457   // This file's header flags.
458   FileFlags flags_;
459 };
460 
461 }  // namespace mach_o
462 }  // namespace google_breakpad
463 
464 #endif  // BREAKPAD_COMMON_MAC_MACHO_READER_H_
465