1 // Copyright (c) 2010, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31 
32 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
33 // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
34 
35 #include "common/mac/macho_reader.h"
36 
37 #include <assert.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 
41 #include <limits>
42 
43 // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
44 #if !defined(CPU_TYPE_ARM)
45 #define CPU_TYPE_ARM 12
46 #endif
47 
48 #if !defined(CPU_TYPE_ARM_64)
49 #define CPU_TYPE_ARM_64 16777228
50 #endif
51 
52 namespace google_breakpad {
53 namespace mach_o {
54 
55 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
56 // arguments, so you can't place expressions that do necessary work in
57 // the argument of an assert. Nor can you assign the result of the
58 // expression to a variable and assert that the variable's value is
59 // true: you'll get unused variable warnings when NDEBUG is #defined.
60 //
61 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
62 // the result is true if NDEBUG is not #defined.
63 #if defined(NDEBUG)
64 #define ASSERT_ALWAYS_EVAL(x) (x)
65 #else
66 #define ASSERT_ALWAYS_EVAL(x) assert(x)
67 #endif
68 
BadHeader()69 void FatReader::Reporter::BadHeader() {
70   fprintf(stderr, "%s: file is neither a fat binary file"
71           " nor a Mach-O object file\n", filename_.c_str());
72 }
73 
TooShort()74 void FatReader::Reporter::TooShort() {
75   fprintf(stderr, "%s: file too short for the data it claims to contain\n",
76           filename_.c_str());
77 }
78 
MisplacedObjectFile()79 void FatReader::Reporter::MisplacedObjectFile() {
80   fprintf(stderr, "%s: file too short for the object files it claims"
81           " to contain\n", filename_.c_str());
82 }
83 
Read(const uint8_t * buffer,size_t size)84 bool FatReader::Read(const uint8_t *buffer, size_t size) {
85   buffer_.start = buffer;
86   buffer_.end = buffer + size;
87   ByteCursor cursor(&buffer_);
88 
89   // Fat binaries always use big-endian, so read the magic number in
90   // that endianness. To recognize Mach-O magic numbers, which can use
91   // either endianness, check for both the proper and reversed forms
92   // of the magic numbers.
93   cursor.set_big_endian(true);
94   if (cursor >> magic_) {
95     if (magic_ == FAT_MAGIC) {
96       // How many object files does this fat binary contain?
97       uint32_t object_files_count;
98       if (!(cursor >> object_files_count)) {  // nfat_arch
99         reporter_->TooShort();
100         return false;
101       }
102 
103       // Read the list of object files.
104       object_files_.resize(object_files_count);
105       for (size_t i = 0; i < object_files_count; i++) {
106         struct fat_arch objfile;
107 
108         // Read this object file entry, byte-swapping as appropriate.
109         cursor >> objfile.cputype
110                >> objfile.cpusubtype
111                >> objfile.offset
112                >> objfile.size
113                >> objfile.align;
114 
115         SuperFatArch super_fat_arch(objfile);
116         object_files_[i] = super_fat_arch;
117 
118         if (!cursor) {
119           reporter_->TooShort();
120           return false;
121         }
122         // Does the file actually have the bytes this entry refers to?
123         size_t fat_size = buffer_.Size();
124         if (objfile.offset > fat_size ||
125             objfile.size > fat_size - objfile.offset) {
126           reporter_->MisplacedObjectFile();
127           return false;
128         }
129       }
130 
131       return true;
132     } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
133                magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
134       // If this is a little-endian Mach-O file, fix the cursor's endianness.
135       if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
136         cursor.set_big_endian(false);
137       // Record the entire file as a single entry in the object file list.
138       object_files_.resize(1);
139 
140       // Get the cpu type and subtype from the Mach-O header.
141       if (!(cursor >> object_files_[0].cputype
142                    >> object_files_[0].cpusubtype)) {
143         reporter_->TooShort();
144         return false;
145       }
146 
147       object_files_[0].offset = 0;
148       object_files_[0].size = static_cast<uint64_t>(buffer_.Size());
149       // This alignment is correct for 32 and 64-bit x86 and ppc.
150       // See get_align in the lipo source for other architectures:
151       // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
152       object_files_[0].align = 12;  // 2^12 == 4096
153       return true;
154     }
155   }
156   reporter_->BadHeader();
157   return false;
158 }
159 
BadHeader()160 void Reader::Reporter::BadHeader() {
161   fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
162 }
163 
CPUTypeMismatch(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)164 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
165                                        cpu_subtype_t cpu_subtype,
166                                        cpu_type_t expected_cpu_type,
167                                        cpu_subtype_t expected_cpu_subtype) {
168   fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
169           " type %d, subtype %d\n",
170           filename_.c_str(), cpu_type, cpu_subtype,
171           expected_cpu_type, expected_cpu_subtype);
172 }
173 
HeaderTruncated()174 void Reader::Reporter::HeaderTruncated() {
175   fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
176           filename_.c_str());
177 }
178 
LoadCommandRegionTruncated()179 void Reader::Reporter::LoadCommandRegionTruncated() {
180   fprintf(stderr, "%s: file too short to hold load command region"
181           " given in Mach-O header\n", filename_.c_str());
182 }
183 
LoadCommandsOverrun(size_t claimed,size_t i,LoadCommandType type)184 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
185                                            LoadCommandType type) {
186   fprintf(stderr, "%s: file's header claims there are %zu"
187           " load commands, but load command #%zu",
188           filename_.c_str(), claimed, i);
189   if (type) fprintf(stderr, ", of type %d,", type);
190   fprintf(stderr, " extends beyond the end of the load command region\n");
191 }
192 
LoadCommandTooShort(size_t i,LoadCommandType type)193 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
194   fprintf(stderr, "%s: the contents of load command #%zu, of type %d,"
195           " extend beyond the size given in the load command's header\n",
196           filename_.c_str(), i, type);
197 }
198 
SectionsMissing(const string & name)199 void Reader::Reporter::SectionsMissing(const string &name) {
200   fprintf(stderr, "%s: the load command for segment '%s'"
201           " is too short to hold the section headers it claims to have\n",
202           filename_.c_str(), name.c_str());
203 }
204 
MisplacedSegmentData(const string & name)205 void Reader::Reporter::MisplacedSegmentData(const string &name) {
206   fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
207           " the end of the file\n", filename_.c_str(), name.c_str());
208 }
209 
MisplacedSectionData(const string & section,const string & segment)210 void Reader::Reporter::MisplacedSectionData(const string &section,
211                                             const string &segment) {
212   fprintf(stderr, "%s: the section '%s' in segment '%s'"
213           " claims its contents lie outside the segment's contents\n",
214           filename_.c_str(), section.c_str(), segment.c_str());
215 }
216 
MisplacedSymbolTable()217 void Reader::Reporter::MisplacedSymbolTable() {
218   fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
219           " table's contents are located beyond the end of the file\n",
220           filename_.c_str());
221 }
222 
UnsupportedCPUType(cpu_type_t cpu_type)223 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
224   fprintf(stderr, "%s: CPU type %d is not supported\n",
225           filename_.c_str(), cpu_type);
226 }
227 
Read(const uint8_t * buffer,size_t size,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)228 bool Reader::Read(const uint8_t *buffer,
229                   size_t size,
230                   cpu_type_t expected_cpu_type,
231                   cpu_subtype_t expected_cpu_subtype) {
232   assert(!buffer_.start);
233   buffer_.start = buffer;
234   buffer_.end = buffer + size;
235   ByteCursor cursor(&buffer_, true);
236   uint32_t magic;
237   if (!(cursor >> magic)) {
238     reporter_->HeaderTruncated();
239     return false;
240   }
241 
242   if (expected_cpu_type != CPU_TYPE_ANY) {
243     uint32_t expected_magic;
244     // validate that magic matches the expected cpu type
245     switch (expected_cpu_type) {
246       case CPU_TYPE_ARM:
247       case CPU_TYPE_I386:
248         expected_magic = MH_CIGAM;
249         break;
250       case CPU_TYPE_POWERPC:
251         expected_magic = MH_MAGIC;
252         break;
253       case CPU_TYPE_ARM_64:
254       case CPU_TYPE_X86_64:
255         expected_magic = MH_CIGAM_64;
256         break;
257       case CPU_TYPE_POWERPC64:
258         expected_magic = MH_MAGIC_64;
259         break;
260       default:
261         reporter_->UnsupportedCPUType(expected_cpu_type);
262         return false;
263     }
264 
265     if (expected_magic != magic) {
266       reporter_->BadHeader();
267       return false;
268     }
269   }
270 
271   // Since the byte cursor is in big-endian mode, a reversed magic number
272   // always indicates a little-endian file, regardless of our own endianness.
273   switch (magic) {
274     case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
275     case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
276     case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
277     case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
278     default:
279       reporter_->BadHeader();
280       return false;
281   }
282   cursor.set_big_endian(big_endian_);
283   uint32_t commands_size, reserved;
284   cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
285          >> commands_size >> flags_;
286   if (bits_64_)
287     cursor >> reserved;
288   if (!cursor) {
289     reporter_->HeaderTruncated();
290     return false;
291   }
292 
293   if (expected_cpu_type != CPU_TYPE_ANY &&
294       (expected_cpu_type != cpu_type_ ||
295        expected_cpu_subtype != cpu_subtype_)) {
296     reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
297                               expected_cpu_type, expected_cpu_subtype);
298     return false;
299   }
300 
301   cursor
302       .PointTo(&load_commands_.start, commands_size)
303       .PointTo(&load_commands_.end, 0);
304   if (!cursor) {
305     reporter_->LoadCommandRegionTruncated();
306     return false;
307   }
308 
309   return true;
310 }
311 
WalkLoadCommands(Reader::LoadCommandHandler * handler) const312 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
313   ByteCursor list_cursor(&load_commands_, big_endian_);
314 
315   for (size_t index = 0; index < load_command_count_; ++index) {
316     // command refers to this load command alone, so that cursor will
317     // refuse to read past the load command's end. But since we haven't
318     // read the size yet, let command initially refer to the entire
319     // remainder of the load command series.
320     ByteBuffer command(list_cursor.here(), list_cursor.Available());
321     ByteCursor cursor(&command, big_endian_);
322 
323     // Read the command type and size --- fields common to all commands.
324     uint32_t type, size;
325     if (!(cursor >> type)) {
326       reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
327       return false;
328     }
329     if (!(cursor >> size) || size > command.Size()) {
330       reporter_->LoadCommandsOverrun(load_command_count_, index, type);
331       return false;
332     }
333 
334     // Now that we've read the length, restrict command's range to this
335     // load command only.
336     command.end = command.start + size;
337 
338     switch (type) {
339       case LC_SEGMENT:
340       case LC_SEGMENT_64: {
341         Segment segment;
342         segment.bits_64 = (type == LC_SEGMENT_64);
343         size_t word_size = segment.bits_64 ? 8 : 4;
344         cursor.CString(&segment.name, 16);
345         cursor
346             .Read(word_size, false, &segment.vmaddr)
347             .Read(word_size, false, &segment.vmsize)
348             .Read(word_size, false, &segment.fileoff)
349             .Read(word_size, false, &segment.filesize);
350         cursor >> segment.maxprot
351                >> segment.initprot
352                >> segment.nsects
353                >> segment.flags;
354         if (!cursor) {
355           reporter_->LoadCommandTooShort(index, type);
356           return false;
357         }
358         if (segment.fileoff > buffer_.Size() ||
359             segment.filesize > buffer_.Size() - segment.fileoff) {
360           reporter_->MisplacedSegmentData(segment.name);
361           return false;
362         }
363         // Mach-O files in .dSYM bundles have the contents of the loaded
364         // segments removed, and their file offsets and file sizes zeroed
365         // out. To help us handle this special case properly, give such
366         // segments' contents NULL starting and ending pointers.
367         if (segment.fileoff == 0 && segment.filesize == 0) {
368           segment.contents.start = segment.contents.end = NULL;
369         } else {
370           segment.contents.start = buffer_.start + segment.fileoff;
371           segment.contents.end = segment.contents.start + segment.filesize;
372         }
373         // The section list occupies the remainder of this load command's space.
374         segment.section_list.start = cursor.here();
375         segment.section_list.end = command.end;
376 
377         if (!handler->SegmentCommand(segment))
378           return false;
379         break;
380       }
381 
382       case LC_SYMTAB: {
383         uint32_t symoff, nsyms, stroff, strsize;
384         cursor >> symoff >> nsyms >> stroff >> strsize;
385         if (!cursor) {
386           reporter_->LoadCommandTooShort(index, type);
387           return false;
388         }
389         // How big are the entries in the symbol table?
390         // sizeof(struct nlist_64) : sizeof(struct nlist),
391         // but be paranoid about alignment vs. target architecture.
392         size_t symbol_size = bits_64_ ? 16 : 12;
393         // How big is the entire symbol array?
394         size_t symbols_size = nsyms * symbol_size;
395         if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
396             stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
397           reporter_->MisplacedSymbolTable();
398           return false;
399         }
400         ByteBuffer entries(buffer_.start + symoff, symbols_size);
401         ByteBuffer names(buffer_.start + stroff, strsize);
402         if (!handler->SymtabCommand(entries, names))
403           return false;
404         break;
405       }
406 
407       default: {
408         if (!handler->UnknownCommand(type, command))
409           return false;
410         break;
411       }
412     }
413 
414     list_cursor.set_here(command.end);
415   }
416 
417   return true;
418 }
419 
420 // A load command handler that looks for a segment of a given name.
421 class Reader::SegmentFinder : public LoadCommandHandler {
422  public:
423   // Create a load command handler that looks for a segment named NAME,
424   // and sets SEGMENT to describe it if found.
SegmentFinder(const string & name,Segment * segment)425   SegmentFinder(const string &name, Segment *segment)
426       : name_(name), segment_(segment), found_() { }
427 
428   // Return true if the traversal found the segment, false otherwise.
found() const429   bool found() const { return found_; }
430 
SegmentCommand(const Segment & segment)431   bool SegmentCommand(const Segment &segment) {
432     if (segment.name == name_) {
433       *segment_ = segment;
434       found_ = true;
435       return false;
436     }
437     return true;
438   }
439 
440  private:
441   // The name of the segment our creator is looking for.
442   const string &name_;
443 
444   // Where we should store the segment if found. (WEAK)
445   Segment *segment_;
446 
447   // True if we found the segment.
448   bool found_;
449 };
450 
FindSegment(const string & name,Segment * segment) const451 bool Reader::FindSegment(const string &name, Segment *segment) const {
452   SegmentFinder finder(name, segment);
453   WalkLoadCommands(&finder);
454   return finder.found();
455 }
456 
WalkSegmentSections(const Segment & segment,SectionHandler * handler) const457 bool Reader::WalkSegmentSections(const Segment &segment,
458                                  SectionHandler *handler) const {
459   size_t word_size = segment.bits_64 ? 8 : 4;
460   ByteCursor cursor(&segment.section_list, big_endian_);
461 
462   for (size_t i = 0; i < segment.nsects; i++) {
463     Section section;
464     section.bits_64 = segment.bits_64;
465     uint64_t size, offset;
466     uint32_t dummy32;
467     cursor
468         .CString(&section.section_name, 16)
469         .CString(&section.segment_name, 16)
470         .Read(word_size, false, &section.address)
471         .Read(word_size, false, &size)
472         .Read(sizeof(uint32_t), false, &offset)  // clears high bits of |offset|
473         >> section.align
474         >> dummy32
475         >> dummy32
476         >> section.flags
477         >> dummy32
478         >> dummy32;
479     if (section.bits_64)
480       cursor >> dummy32;
481     if (!cursor) {
482       reporter_->SectionsMissing(segment.name);
483       return false;
484     }
485 
486     // Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle
487     // 64-bit file offsets gracefully. Segment load commands do contain 64-bit
488     // file offsets, but sections within do not. Because segments load
489     // contiguously, recompute each section’s file offset on the basis of its
490     // containing segment’s file offset and the difference between the section’s
491     // and segment’s load addresses. If truncation is detected, honor the
492     // recomputed offset.
493     if (segment.bits_64 &&
494         segment.fileoff + segment.filesize >
495             std::numeric_limits<uint32_t>::max()) {
496       const uint64_t section_offset_recomputed =
497           segment.fileoff + section.address - segment.vmaddr;
498       if (offset == static_cast<uint32_t>(section_offset_recomputed)) {
499         offset = section_offset_recomputed;
500       }
501     }
502 
503     const uint32_t section_type = section.flags & SECTION_TYPE;
504     if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL ||
505             section_type == S_GB_ZEROFILL) {
506       // Zero-fill sections have a size, but no contents.
507       section.contents.start = section.contents.end = NULL;
508     } else if (segment.contents.start == NULL &&
509                segment.contents.end == NULL) {
510       // Mach-O files in .dSYM bundles have the contents of the loaded
511       // segments removed, and their file offsets and file sizes zeroed
512       // out.  However, the sections within those segments still have
513       // non-zero sizes.  There's no reason to call MisplacedSectionData in
514       // this case; the caller may just need the section's load
515       // address. But do set the contents' limits to NULL, for safety.
516       section.contents.start = section.contents.end = NULL;
517     } else {
518       if (offset < size_t(segment.contents.start - buffer_.start) ||
519           offset > size_t(segment.contents.end - buffer_.start) ||
520           size > size_t(segment.contents.end - buffer_.start - offset)) {
521         reporter_->MisplacedSectionData(section.section_name,
522                                         section.segment_name);
523         return false;
524       }
525       section.contents.start = buffer_.start + offset;
526       section.contents.end = section.contents.start + size;
527     }
528     if (!handler->HandleSection(section))
529       return false;
530   }
531   return true;
532 }
533 
534 // A SectionHandler that builds a SectionMap for the sections within a
535 // given segment.
536 class Reader::SectionMapper: public SectionHandler {
537  public:
538   // Create a SectionHandler that populates MAP with an entry for
539   // each section it is given.
SectionMapper(SectionMap * map)540   SectionMapper(SectionMap *map) : map_(map) { }
HandleSection(const Section & section)541   bool HandleSection(const Section &section) {
542     (*map_)[section.section_name] = section;
543     return true;
544   }
545  private:
546   // The map under construction. (WEAK)
547   SectionMap *map_;
548 };
549 
MapSegmentSections(const Segment & segment,SectionMap * section_map) const550 bool Reader::MapSegmentSections(const Segment &segment,
551                                 SectionMap *section_map) const {
552   section_map->clear();
553   SectionMapper mapper(section_map);
554   return WalkSegmentSections(segment, &mapper);
555 }
556 
557 }  // namespace mach_o
558 }  // namespace google_breakpad
559