1 // Copyright (c) 2007, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 extern "C" { // needed to compile on Leopard
31   #include <mach-o/nlist.h>
32   #include <stdlib.h>
33   #include <stdio.h>
34 }
35 
36 #include "breakpad_nlist_64.h"
37 #include <dlfcn.h>
38 #include <mach/mach_vm.h>
39 #include <algorithm>
40 #include "client/mac/handler/dynamic_images.h"
41 
42 namespace google_breakpad {
43 
44 //==============================================================================
45 // Returns the size of the memory region containing |address| and the
46 // number of bytes from |address| to the end of the region.
47 // We potentially, will extend the size of the original
48 // region by the size of the following region if it's contiguous with the
49 // first in order to handle cases when we're reading strings and they
50 // straddle two vm regions.
51 //
GetMemoryRegionSize(task_port_t target_task,const void * address,mach_vm_size_t * size_to_end)52 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
53                                           const void* address,
54                                           mach_vm_size_t *size_to_end) {
55   mach_vm_address_t region_base = (mach_vm_address_t)address;
56   mach_vm_size_t region_size;
57   natural_t nesting_level = 0;
58   vm_region_submap_info_64 submap_info;
59   mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
60 
61   // Get information about the vm region containing |address|
62   vm_region_recurse_info_t region_info;
63   region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
64 
65   kern_return_t result =
66     mach_vm_region_recurse(target_task,
67                            &region_base,
68                            &region_size,
69                            &nesting_level,
70                            region_info,
71                            &info_count);
72 
73   if (result == KERN_SUCCESS) {
74     // Get distance from |address| to the end of this region
75     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
76 
77     // If we want to handle strings as long as 4096 characters we may need
78     // to check if there's a vm region immediately following the first one.
79     // If so, we need to extend |*size_to_end| to go all the way to the end
80     // of the second region.
81     if (*size_to_end < 4096) {
82       // Second region starts where the first one ends
83       mach_vm_address_t region_base2 =
84         (mach_vm_address_t)(region_base + region_size);
85       mach_vm_size_t region_size2;
86 
87       // Get information about the following vm region
88       result =
89         mach_vm_region_recurse(target_task,
90                                &region_base2,
91                                &region_size2,
92                                &nesting_level,
93                                region_info,
94                                &info_count);
95 
96       // Extend region_size to go all the way to the end of the 2nd region
97       if (result == KERN_SUCCESS
98           && region_base2 == region_base + region_size) {
99         region_size += region_size2;
100       }
101     }
102 
103     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
104   } else {
105     region_size = 0;
106     *size_to_end = 0;
107   }
108 
109   return region_size;
110 }
111 
112 #define kMaxStringLength 8192
113 //==============================================================================
114 // Reads a NULL-terminated string from another task.
115 //
116 // Warning!  This will not read any strings longer than kMaxStringLength-1
117 //
ReadTaskString(task_port_t target_task,const void * address)118 static void* ReadTaskString(task_port_t target_task,
119                             const void* address) {
120   // The problem is we don't know how much to read until we know how long
121   // the string is. And we don't know how long the string is, until we've read
122   // the memory!  So, we'll try to read kMaxStringLength bytes
123   // (or as many bytes as we can until we reach the end of the vm region).
124   mach_vm_size_t size_to_end;
125   GetMemoryRegionSize(target_task, address, &size_to_end);
126 
127   if (size_to_end > 0) {
128     mach_vm_size_t size_to_read =
129       size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
130 
131     kern_return_t kr;
132     return ReadTaskMemory(target_task, address, size_to_read, &kr);
133   }
134 
135   return NULL;
136 }
137 
138 //==============================================================================
139 // Reads an address range from another task.  A block of memory is malloced
140 // and should be freed by the caller.
ReadTaskMemory(task_port_t target_task,const void * address,size_t length,kern_return_t * kr)141 void* ReadTaskMemory(task_port_t target_task,
142                      const void* address,
143                      size_t length,
144                      kern_return_t *kr) {
145   void* result = NULL;
146   int systemPageSize = getpagesize();
147 
148   // use the negative of the page size for the mask to find the page address
149   mach_vm_address_t page_address =
150       reinterpret_cast<mach_vm_address_t>(address) & (-systemPageSize);
151 
152   mach_vm_address_t last_page_address =
153       (reinterpret_cast<mach_vm_address_t>(address) + length +
154        (systemPageSize - 1)) & (-systemPageSize);
155 
156   mach_vm_size_t page_size = last_page_address - page_address;
157   uint8_t* local_start;
158   uint32_t local_length;
159 
160   kern_return_t r;
161 
162   r = mach_vm_read(target_task,
163                    page_address,
164                    page_size,
165                    reinterpret_cast<vm_offset_t*>(&local_start),
166                    &local_length);
167 
168 
169   if (kr != NULL) {
170     *kr = r;
171   }
172 
173   if (r == KERN_SUCCESS) {
174     result = malloc(length);
175     if (result != NULL) {
176       memcpy(result,
177              &local_start[(mach_vm_address_t)address - page_address],
178              length);
179     }
180     mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
181   }
182 
183   return result;
184 }
185 
186 #pragma mark -
187 
188 //==============================================================================
189 // Initializes vmaddr_, vmsize_, and slide_
CalculateMemoryAndVersionInfo()190 void DynamicImage::CalculateMemoryAndVersionInfo() {
191   breakpad_mach_header *header = GetMachHeader();
192 
193   // unless we can process the header, ensure that calls to
194   // IsValid() will return false
195   vmaddr_ = 0;
196   vmsize_ = 0;
197   slide_ = 0;
198   version_ = 0;
199 
200   bool foundTextSection = false;
201   bool foundDylibIDCommand = false;
202 
203 #if __LP64__
204   if(header->magic != MH_MAGIC_64) {
205     return;
206   }
207 #else
208   if(header->magic != MH_MAGIC) {
209     return;
210   }
211 #endif
212 
213 #ifdef __LP64__
214   const uint32_t segmentLoadCommand = LC_SEGMENT_64;
215 #else
216   const uint32_t segmentLoadCommand = LC_SEGMENT;
217 #endif
218 
219   const struct load_command *cmd =
220     reinterpret_cast<const struct load_command *>(header + 1);
221 
222   for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
223     if (!foundTextSection) {
224       if (cmd->cmd == segmentLoadCommand) {
225         const breakpad_mach_segment_command *seg =
226             reinterpret_cast<const breakpad_mach_segment_command *>(cmd);
227 
228         if (!strcmp(seg->segname, "__TEXT")) {
229           vmaddr_ = seg->vmaddr;
230           vmsize_ = seg->vmsize;
231           slide_ = 0;
232 
233           if (seg->fileoff == 0  &&  seg->filesize != 0) {
234             slide_ = (uintptr_t)GetLoadAddress() - (uintptr_t)seg->vmaddr;
235           }
236           foundTextSection = true;
237         }
238       }
239     }
240 
241     if (!foundDylibIDCommand) {
242       if (cmd->cmd == LC_ID_DYLIB) {
243         const struct dylib_command *dc =
244             reinterpret_cast<const struct dylib_command *>(cmd);
245 
246         version_ = dc->dylib.current_version;
247         foundDylibIDCommand = true;
248       }
249     }
250 
251     if (foundDylibIDCommand && foundTextSection) {
252       return;
253     }
254 
255     cmd = reinterpret_cast<const struct load_command *>
256       (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
257   }
258 
259 }
260 
Print()261 void DynamicImage::Print() {
262   const char *path = GetFilePath();
263   if (!path) {
264     path = "(unknown)";
265   }
266   printf("%p: %s\n", GetLoadAddress(), path);
267   breakpad_mach_header *header = GetMachHeader();
268   MachHeader(*header).Print();
269   printf("vmaddr\t\t: %p\n", reinterpret_cast<void*>(GetVMAddr()));
270   printf("vmsize\t\t: %llu\n", GetVMSize());
271   printf("slide\t\t: %td\n", GetVMAddrSlide());
272 }
273 
274 #pragma mark -
275 
276 //==============================================================================
277 // Loads information about dynamically loaded code in the given task.
DynamicImages(mach_port_t task)278 DynamicImages::DynamicImages(mach_port_t task)
279   : task_(task) {
280   ReadImageInfoForTask();
281 }
282 
GetDyldAllImageInfosPointer()283 void* DynamicImages::GetDyldAllImageInfosPointer()
284 {
285 
286   const char *imageSymbolName = "_dyld_all_image_infos";
287   const char *dyldPath = "/usr/lib/dyld";
288 #ifndef __LP64__
289   struct nlist l[8];
290   memset(l, 0, sizeof(l) );
291 
292   // First we lookup the address of the "_dyld_all_image_infos" struct
293   // which lives in "dyld".  This structure contains information about all
294   // of the loaded dynamic images.
295   struct nlist &list = l[0];
296   list.n_un.n_name = const_cast<char *>(imageSymbolName);
297   nlist(dyldPath,&list);
298   if(list.n_value) {
299     return reinterpret_cast<void*>(list.n_value);
300   }
301 
302   return NULL;
303 #else
304   struct nlist_64 l[8];
305   struct nlist_64 &list = l[0];
306 
307   memset(l, 0, sizeof(l) );
308 
309   const char *symbolNames[2] = { imageSymbolName, "\0" };
310 
311   int invalidEntriesCount = breakpad_nlist_64(dyldPath,&list,symbolNames);
312 
313   if(invalidEntriesCount != 0) {
314     return NULL;
315   }
316   assert(list.n_value);
317   return reinterpret_cast<void*>(list.n_value);
318 #endif
319 
320 }
321 //==============================================================================
322 // This code was written using dyld_debug.c (from Darwin) as a guide.
ReadImageInfoForTask()323 void DynamicImages::ReadImageInfoForTask() {
324   void *imageList = GetDyldAllImageInfosPointer();
325 
326   if (imageList) {
327     kern_return_t kr;
328     // Read the structure inside of dyld that contains information about
329     // loaded images.  We're reading from the desired task's address space.
330 
331     // Here we make the assumption that dyld loaded at the same address in
332     // the crashed process vs. this one.  This is an assumption made in
333     // "dyld_debug.c" and is said to be nearly always valid.
334     dyld_all_image_infos *dyldInfo = reinterpret_cast<dyld_all_image_infos*>
335       (ReadTaskMemory(task_,
336                       reinterpret_cast<void*>(imageList),
337                       sizeof(dyld_all_image_infos), &kr));
338 
339     if (dyldInfo) {
340       // number of loaded images
341       int count = dyldInfo->infoArrayCount;
342 
343       // Read an array of dyld_image_info structures each containing
344       // information about a loaded image.
345       dyld_image_info *infoArray = reinterpret_cast<dyld_image_info*>
346         (ReadTaskMemory(task_,
347                         dyldInfo->infoArray,
348                         count*sizeof(dyld_image_info), &kr));
349 
350       image_list_.reserve(count);
351 
352       for (int i = 0; i < count; ++i) {
353         dyld_image_info &info = infoArray[i];
354 
355         // First read just the mach_header from the image in the task.
356         breakpad_mach_header *header = reinterpret_cast<breakpad_mach_header*>
357           (ReadTaskMemory(task_,
358                           info.load_address_,
359                           sizeof(breakpad_mach_header), &kr));
360 
361         if (!header)
362           break;   // bail on this dynamic image
363 
364         // Now determine the total amount we really want to read based on the
365         // size of the load commands.  We need the header plus all of the
366         // load commands.
367         unsigned int header_size =
368             sizeof(breakpad_mach_header) + header->sizeofcmds;
369 
370         free(header);
371 
372         header = reinterpret_cast<breakpad_mach_header*>
373           (ReadTaskMemory(task_, info.load_address_, header_size, &kr));
374 
375         // Read the file name from the task's memory space.
376         char *file_path = NULL;
377         if (info.file_path_) {
378           // Although we're reading kMaxStringLength bytes, it's copied in the
379           // the DynamicImage constructor below with the correct string length,
380           // so it's not really wasting memory.
381           file_path = reinterpret_cast<char*>
382             (ReadTaskString(task_, info.file_path_));
383         }
384 
385         // Create an object representing this image and add it to our list.
386         DynamicImage *new_image;
387         new_image = new DynamicImage(header,
388                                      header_size,
389                                      (breakpad_mach_header*)info.load_address_,
390                                      file_path,
391                                      info.file_mod_date_,
392                                      task_);
393 
394         if (new_image->IsValid()) {
395           image_list_.push_back(DynamicImageRef(new_image));
396         } else {
397           delete new_image;
398         }
399 
400         if (file_path) {
401           free(file_path);
402         }
403       }
404 
405       free(dyldInfo);
406       free(infoArray);
407 
408       // sorts based on loading address
409       sort(image_list_.begin(), image_list_.end() );
410       // remove duplicates - this happens in certain strange cases
411       // You can see it in DashboardClient when Google Gadgets plugin
412       // is installed.  Apple's crash reporter log and gdb "info shared"
413       // both show the same library multiple times at the same address
414 
415       vector<DynamicImageRef>::iterator it = unique(image_list_.begin(),
416                                                     image_list_.end() );
417       image_list_.erase(it, image_list_.end());
418     }
419   }
420 }
421 
422 //==============================================================================
GetExecutableImage()423 DynamicImage  *DynamicImages::GetExecutableImage() {
424   int executable_index = GetExecutableImageIndex();
425 
426   if (executable_index >= 0) {
427     return GetImage(executable_index);
428   }
429 
430   return NULL;
431 }
432 
433 //==============================================================================
434 // returns -1 if failure to find executable
GetExecutableImageIndex()435 int DynamicImages::GetExecutableImageIndex() {
436   int image_count = GetImageCount();
437 
438   for (int i = 0; i < image_count; ++i) {
439     DynamicImage  *image = GetImage(i);
440     if (image->GetMachHeader()->filetype == MH_EXECUTE) {
441       return i;
442     }
443   }
444 
445   return -1;
446 }
447 
448 }  // namespace google_breakpad
449