1 // Copyright (c) 2007, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 //  dynamic_images.h
31 //
32 //    Implements most of the function of the dyld API, but allowing an
33 //    arbitrary task to be introspected, unlike the dyld API which
34 //    only allows operation on the current task.  The current implementation
35 //    is limited to use by 32-bit tasks.
36 
37 #ifndef CLIENT_MAC_HANDLER_DYNAMIC_IMAGES_H__
38 #define CLIENT_MAC_HANDLER_DYNAMIC_IMAGES_H__
39 
40 #include <mach/mach.h>
41 #include <mach-o/dyld.h>
42 #include <mach-o/loader.h>
43 #include <sys/types.h>
44 
45 #include <string>
46 #include <vector>
47 
48 #include "mach_vm_compat.h"
49 
50 namespace google_breakpad {
51 
52 using std::string;
53 using std::vector;
54 
55 //==============================================================================
56 // The memory layout of this struct matches the dyld_image_info struct
57 // defined in "dyld_gdb.h" in the darwin source.
58 typedef struct dyld_image_info32 {
59   uint32_t                   load_address_;  // struct mach_header*
60   uint32_t                   file_path_;     // char*
61   uint32_t                   file_mod_date_;
62 } dyld_image_info32;
63 
64 typedef struct dyld_image_info64 {
65   uint64_t                   load_address_;  // struct mach_header*
66   uint64_t                   file_path_;     // char*
67   uint64_t                   file_mod_date_;
68 } dyld_image_info64;
69 
70 //==============================================================================
71 // This is as defined in "dyld_gdb.h" in the darwin source.
72 // _dyld_all_image_infos (in dyld) is a structure of this type
73 // which will be used to determine which dynamic code has been loaded.
74 typedef struct dyld_all_image_infos32 {
75   uint32_t                      version;  // == 1 in Mac OS X 10.4
76   uint32_t                      infoArrayCount;
77   uint32_t                      infoArray;  // const struct dyld_image_info*
78   uint32_t                      notification;
79   bool                          processDetachedFromSharedRegion;
80   uint32_t                      padding[15];
81   // Only in version 12 (Mac OS X 10.7, iOS 4.3) and later
82   uint32_t                      sharedCacheSlide;
83 } dyld_all_image_infos32;
84 
85 typedef struct dyld_all_image_infos64 {
86   uint32_t                      version;  // == 1 in Mac OS X 10.4
87   uint32_t                      infoArrayCount;
88   uint64_t                      infoArray;  // const struct dyld_image_info*
89   uint64_t                      notification;
90   bool                          processDetachedFromSharedRegion;
91   uint64_t                      padding[15];
92   // Only in version 12 (Mac OS X 10.7, iOS 4.3) and later
93   uint64_t                      sharedCacheSlide;
94 } dyld_all_image_infos64;
95 
96 // some typedefs to isolate 64/32 bit differences
97 #ifdef __LP64__
98 typedef mach_header_64 breakpad_mach_header;
99 typedef segment_command_64 breakpad_mach_segment_command;
100 #else
101 typedef mach_header breakpad_mach_header;
102 typedef segment_command breakpad_mach_segment_command;
103 #endif
104 
105 // Bit in mach_header.flags that indicates whether or not the image is in the
106 // dyld shared cache. The dyld shared cache is a single image into which
107 // commonly used system dylibs and frameworks are incorporated. dyld maps it
108 // into every process at load time. The component images all have the same
109 // slide.
110 #define MH_SHAREDCACHE 0x80000000
111 
112 // Helper functions to deal with 32-bit/64-bit Mach-O differences.
113 class DynamicImage;
114 template<typename MachBits>
115 bool FindTextSection(DynamicImage& image);
116 
117 template<typename MachBits>
118 uint32_t GetFileTypeFromHeader(DynamicImage& image);
119 
120 //==============================================================================
121 // Represents a single dynamically loaded mach-o image
122 class DynamicImage {
123  public:
DynamicImage(uint8_t * header,size_t header_size,uint64_t load_address,string file_path,uintptr_t image_mod_date,mach_port_t task,cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,ptrdiff_t shared_cache_slide)124   DynamicImage(uint8_t *header,     // data is copied
125                size_t header_size,  // includes load commands
126                uint64_t load_address,
127                string file_path,
128                uintptr_t image_mod_date,
129                mach_port_t task,
130                cpu_type_t cpu_type,
131                cpu_subtype_t cpu_subtype,
132                ptrdiff_t shared_cache_slide)
133     : header_(header, header + header_size),
134       header_size_(header_size),
135       load_address_(load_address),
136       vmaddr_(0),
137       vmsize_(0),
138       slide_(0),
139       crash_info_(),
140       version_(0),
141       file_path_(file_path),
142       file_mod_date_(image_mod_date),
143       task_(task),
144       cpu_type_(cpu_type),
145       cpu_subtype_(cpu_subtype),
146       shared_cache_slide_(shared_cache_slide) {
147     CalculateMemoryAndVersionInfo();
148   }
149 
150   // Size of mach_header plus load commands
GetHeaderSize()151   size_t GetHeaderSize() const {return header_.size();}
152 
153   // Full path to mach-o binary
GetFilePath()154   string GetFilePath() {return file_path_;}
155 
GetModDate()156   uint64_t GetModDate() const {return file_mod_date_;}
157 
158   // Actual address where the image was loaded
GetLoadAddress()159   uint64_t GetLoadAddress() const {return load_address_;}
160 
161   // Address where the image should be loaded
GetVMAddr()162   mach_vm_address_t GetVMAddr() const {return vmaddr_;}
163 
GetInDyldSharedCache()164   bool GetInDyldSharedCache()
165     {return (shared_cache_slide_ && (slide_ == shared_cache_slide_));}
166 
167   // Difference between GetLoadAddress() and GetVMAddr()
GetVMAddrSlide()168   ptrdiff_t GetVMAddrSlide() const {return slide_;}
169 
170   // Size of the image
GetVMSize()171   mach_vm_size_t GetVMSize() const {return vmsize_;}
172 
173   // Returns the address of the locally cached __DATA,__crash_info section.
174   // The vector will be empty if the image doesn't have a __crash_info
175   // section. But even if the vector isn't empty, its contents may be "empty"
176   // of useful data (see definition of crashreporter_annotations_t in
177   // mach_vm_compat.h).
GetCrashInfo()178   mach_vm_address_t GetCrashInfo() const {
179     return reinterpret_cast<mach_vm_address_t>(&crash_info_[0]);
180   }
181 
182   // Size of the locally cached __DATA,__crash_info section. This will be zero
183   // if the vector is empty. But even if it's non-zero, the __crash_info
184   // section of which it's a copy may be empty of useful data.
GetCrashInfoSize()185   size_t GetCrashInfoSize() const {return crash_info_.size();}
186 
187   // Task owning this loaded image
GetTask()188   mach_port_t GetTask() {return task_;}
189 
190   // CPU type of the task and the image
GetCPUType()191   cpu_type_t GetCPUType() {return cpu_type_;}
192 
193   // CPU subtype of the image
GetCPUSubtype()194   cpu_type_t GetCPUSubtype() {return cpu_subtype_;}
195 
196   // filetype from the Mach-O header.
197   uint32_t GetFileType();
198 
199   // Return true if the task is a 64-bit architecture.
Is64Bit()200   bool Is64Bit() { return (GetCPUType() & CPU_ARCH_ABI64) == CPU_ARCH_ABI64; }
201 
GetVersion()202   uint32_t GetVersion() {return version_;}
203   // For sorting
204   bool operator<(const DynamicImage &inInfo) {
205     return GetLoadAddress() < inInfo.GetLoadAddress();
206   }
207 
208   // Sanity checking
IsValid()209   bool IsValid() {return GetVMSize() != 0;}
210 
211  private:
212   DynamicImage(const DynamicImage &);
213   DynamicImage &operator=(const DynamicImage &);
214 
215   friend class DynamicImages;
216   template<typename MachBits>
217   friend bool FindTextSection(DynamicImage& image);
218   template<typename MachBits>
219   friend uint32_t GetFileTypeFromHeader(DynamicImage& image);
220 
221   // Initializes vmaddr_, vmsize_, and slide_
222   void CalculateMemoryAndVersionInfo();
223 
224   const vector<uint8_t>   header_;        // our local copy of the header
225   size_t                  header_size_;    // mach_header plus load commands
226   uint64_t                load_address_;   // base address image is mapped into
227   mach_vm_address_t       vmaddr_;
228   mach_vm_size_t          vmsize_;
229   ptrdiff_t               slide_;
230   vector<uint8_t>         crash_info_;
231   uint32_t                version_;        // Dylib version
232   string                  file_path_;     // path dyld used to load the image
233   uintptr_t               file_mod_date_;  // time_t of image file
234 
235   mach_port_t             task_;
236   cpu_type_t              cpu_type_;        // CPU type of task_ and image
237   cpu_subtype_t           cpu_subtype_;     // CPU subtype of image
238   ptrdiff_t               shared_cache_slide_; // Task's shared cache slide
239 };
240 
241 //==============================================================================
242 // DynamicImageRef is just a simple wrapper for a pointer to
243 // DynamicImage.  The reason we use it instead of a simple typedef is so
244 // that we can use stl::sort() on a vector of DynamicImageRefs
245 // and simple class pointers can't implement operator<().
246 //
247 class DynamicImageRef {
248  public:
DynamicImageRef(DynamicImage * inP)249   explicit DynamicImageRef(DynamicImage *inP) : p(inP) {}
250   // The copy constructor is required by STL
DynamicImageRef(const DynamicImageRef & inRef)251   DynamicImageRef(const DynamicImageRef &inRef) : p(inRef.p) {}
252 
253   bool operator<(const DynamicImageRef &inRef) const {
254     return (*const_cast<DynamicImageRef*>(this)->p)
255       < (*const_cast<DynamicImageRef&>(inRef).p);
256   }
257 
258   bool operator==(const DynamicImageRef &inInfo) const {
259     return (*const_cast<DynamicImageRef*>(this)->p).GetLoadAddress() ==
260         (*const_cast<DynamicImageRef&>(inInfo)).GetLoadAddress();
261   }
262 
263   // Be just like DynamicImage*
264   DynamicImage  *operator->() {return p;}
265   operator DynamicImage*() {return p;}
266 
267  private:
268   DynamicImage  *p;
269 };
270 
271 // Helper function to deal with 32-bit/64-bit Mach-O differences.
272 class DynamicImages;
273 template<typename MachBits>
274 void ReadImageInfo(DynamicImages& images, uint64_t image_list_address);
275 
276 //==============================================================================
277 // An object of type DynamicImages may be created to allow introspection of
278 // an arbitrary task's dynamically loaded mach-o binaries.  This makes the
279 // assumption that the current task has send rights to the target task.
280 class DynamicImages {
281  public:
282   explicit DynamicImages(mach_port_t task);
283 
~DynamicImages()284   ~DynamicImages() {
285     for (int i = 0; i < GetImageCount(); ++i) {
286       delete image_list_[i];
287     }
288   }
289 
290   // Returns the number of dynamically loaded mach-o images.
GetImageCount()291   int GetImageCount() const {return static_cast<int>(image_list_.size());}
292 
293   // Returns an individual image.
GetImage(int i)294   DynamicImage *GetImage(int i) {
295     if (i < (int)image_list_.size()) {
296       return image_list_[i];
297     }
298     return NULL;
299   }
300 
301   // Returns the image corresponding to the main executable.
302   DynamicImage *GetExecutableImage();
303   int GetExecutableImageIndex();
304 
305   // Returns the task which we're looking at.
GetTask()306   mach_port_t GetTask() const {return task_;}
307 
308   // CPU type of the task
GetCPUType()309   cpu_type_t GetCPUType() {return cpu_type_;}
310 
311   // Return true if the task is a 64-bit architecture.
Is64Bit()312   bool Is64Bit() { return (GetCPUType() & CPU_ARCH_ABI64) == CPU_ARCH_ABI64; }
313 
314   // Determine the CPU type of the task being dumped.
315   static cpu_type_t DetermineTaskCPUType(task_t task);
316 
317   // Get the native CPU type of this task.
GetNativeCPUType()318   static cpu_type_t GetNativeCPUType() {
319 #if defined(__i386__)
320     return CPU_TYPE_I386;
321 #elif defined(__x86_64__)
322     return CPU_TYPE_X86_64;
323 #elif defined(__ppc__)
324     return CPU_TYPE_POWERPC;
325 #elif defined(__ppc64__)
326     return CPU_TYPE_POWERPC64;
327 #elif defined(__arm__)
328     return CPU_TYPE_ARM;
329 #elif defined(__aarch64__)
330     return CPU_TYPE_ARM64;
331 #else
332 #error "GetNativeCPUType not implemented for this architecture"
333 #endif
334   }
335 
336  private:
337   template<typename MachBits>
338   friend void ReadImageInfo(DynamicImages& images, uint64_t image_list_address);
339 
IsOurTask()340   bool IsOurTask() {return task_ == mach_task_self();}
341 
342   // Initialization
343   void ReadImageInfoForTask();
344   uint64_t GetDyldAllImageInfosPointer();
345 
346   mach_port_t              task_;
347   cpu_type_t               cpu_type_;  // CPU type of task_
348   vector<DynamicImageRef>  image_list_;
349 };
350 
351 // Fill bytes with the contents of memory at a particular
352 // location in another task.
353 kern_return_t ReadTaskMemory(task_port_t target_task,
354                              const uint64_t address,
355                              size_t length,
356                              vector<uint8_t> &bytes);
357 
358 }   // namespace google_breakpad
359 
360 #endif // CLIENT_MAC_HANDLER_DYNAMIC_IMAGES_H__
361