1 // Copyright (c) 2007, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "mac/handler/dynamic_images.h"
31 
32 extern "C" { // needed to compile on Leopard
33   #include <mach-o/nlist.h>
34   #include <stdlib.h>
35   #include <stdio.h>
36 }
37 
38 #include <assert.h>
39 #include <AvailabilityMacros.h>
40 #include <dlfcn.h>
41 #include <mach/task_info.h>
42 #include <sys/sysctl.h>
43 #include <TargetConditionals.h>
44 #include <unistd.h>
45 
46 #include <algorithm>
47 #include <string>
48 #include <vector>
49 
50 #include "breakpad_nlist_64.h"
51 
52 #if !TARGET_OS_IPHONE
53 #include <CoreServices/CoreServices.h>
54 
55 #ifndef MAC_OS_X_VERSION_10_6
56 #define MAC_OS_X_VERSION_10_6 1060
57 #endif
58 
59 #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
60 
61 // Fallback declarations for TASK_DYLD_INFO and friends, introduced in
62 // <mach/task_info.h> in the Mac OS X 10.6 SDK.
63 #define TASK_DYLD_INFO 17
64 struct task_dyld_info {
65   mach_vm_address_t all_image_info_addr;
66   mach_vm_size_t all_image_info_size;
67 };
68 typedef struct task_dyld_info task_dyld_info_data_t;
69 typedef struct task_dyld_info *task_dyld_info_t;
70 #define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t))
71 
72 #endif
73 
74 #endif  // !TARGET_OS_IPHONE
75 
76 namespace google_breakpad {
77 
78 using std::string;
79 using std::vector;
80 
81 //==============================================================================
82 // Returns the size of the memory region containing |address| and the
83 // number of bytes from |address| to the end of the region.
84 // We potentially, will extend the size of the original
85 // region by the size of the following region if it's contiguous with the
86 // first in order to handle cases when we're reading strings and they
87 // straddle two vm regions.
88 //
GetMemoryRegionSize(task_port_t target_task,const uint64_t address,mach_vm_size_t * size_to_end)89 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
90                                           const uint64_t address,
91                                           mach_vm_size_t *size_to_end) {
92   mach_vm_address_t region_base = (mach_vm_address_t)address;
93   mach_vm_size_t region_size;
94   natural_t nesting_level = 0;
95   vm_region_submap_info_64 submap_info;
96   mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
97 
98   // Get information about the vm region containing |address|
99   vm_region_recurse_info_t region_info;
100   region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
101 
102   kern_return_t result =
103     mach_vm_region_recurse(target_task,
104                            &region_base,
105                            &region_size,
106                            &nesting_level,
107                            region_info,
108                            &info_count);
109 
110   if (result == KERN_SUCCESS) {
111     // Get distance from |address| to the end of this region
112     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
113 
114     // If we want to handle strings as long as 4096 characters we may need
115     // to check if there's a vm region immediately following the first one.
116     // If so, we need to extend |*size_to_end| to go all the way to the end
117     // of the second region.
118     if (*size_to_end < 4096) {
119       // Second region starts where the first one ends
120       mach_vm_address_t region_base2 =
121         (mach_vm_address_t)(region_base + region_size);
122       mach_vm_size_t region_size2;
123 
124       // Get information about the following vm region
125       result =
126         mach_vm_region_recurse(target_task,
127                                &region_base2,
128                                &region_size2,
129                                &nesting_level,
130                                region_info,
131                                &info_count);
132 
133       // Extend region_size to go all the way to the end of the 2nd region
134       if (result == KERN_SUCCESS
135           && region_base2 == region_base + region_size) {
136         region_size += region_size2;
137       }
138     }
139 
140     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
141   } else {
142     region_size = 0;
143     *size_to_end = 0;
144   }
145 
146   return region_size;
147 }
148 
149 #define kMaxStringLength 8192
150 //==============================================================================
151 // Reads a NULL-terminated string from another task.
152 //
153 // Warning!  This will not read any strings longer than kMaxStringLength-1
154 //
ReadTaskString(task_port_t target_task,const uint64_t address)155 string ReadTaskString(task_port_t target_task,
156                       const uint64_t address) {
157   // The problem is we don't know how much to read until we know how long
158   // the string is. And we don't know how long the string is, until we've read
159   // the memory!  So, we'll try to read kMaxStringLength bytes
160   // (or as many bytes as we can until we reach the end of the vm region).
161   mach_vm_size_t size_to_end;
162   GetMemoryRegionSize(target_task, address, &size_to_end);
163 
164   if (size_to_end > 0) {
165     mach_vm_size_t size_to_read =
166       size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
167 
168     vector<uint8_t> bytes;
169     if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) !=
170         KERN_SUCCESS)
171       return string();
172 
173     return string(reinterpret_cast<const char*>(&bytes[0]));
174   }
175 
176   return string();
177 }
178 
179 //==============================================================================
180 // Reads an address range from another task. The bytes read will be returned
181 // in bytes, which will be resized as necessary.
ReadTaskMemory(task_port_t target_task,const uint64_t address,size_t length,vector<uint8_t> & bytes)182 kern_return_t ReadTaskMemory(task_port_t target_task,
183                              const uint64_t address,
184                              size_t length,
185                              vector<uint8_t> &bytes) {
186   int systemPageSize = getpagesize();
187 
188   // use the negative of the page size for the mask to find the page address
189   mach_vm_address_t page_address = address & (-systemPageSize);
190 
191   mach_vm_address_t last_page_address =
192       (address + length + (systemPageSize - 1)) & (-systemPageSize);
193 
194   mach_vm_size_t page_size = last_page_address - page_address;
195   uint8_t* local_start;
196   uint32_t local_length;
197 
198   kern_return_t r = mach_vm_read(target_task,
199                                  page_address,
200                                  page_size,
201                                  reinterpret_cast<vm_offset_t*>(&local_start),
202                                  &local_length);
203 
204   if (r != KERN_SUCCESS)
205     return r;
206 
207   bytes.resize(length);
208   memcpy(&bytes[0],
209          &local_start[(mach_vm_address_t)address - page_address],
210          length);
211   mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
212   return KERN_SUCCESS;
213 }
214 
215 #pragma mark -
216 
217 //==============================================================================
218 // Traits structs for specializing function templates to handle
219 // 32-bit/64-bit Mach-O files.
220 struct MachO32 {
221   typedef mach_header mach_header_type;
222   typedef segment_command mach_segment_command_type;
223   typedef dyld_image_info32 dyld_image_info;
224   typedef dyld_all_image_infos32 dyld_all_image_infos;
225   typedef section mach_section_type;
226   typedef struct nlist nlist_type;
227   static const uint32_t magic = MH_MAGIC;
228   static const uint32_t segment_load_command = LC_SEGMENT;
229 };
230 
231 struct MachO64 {
232   typedef mach_header_64 mach_header_type;
233   typedef segment_command_64 mach_segment_command_type;
234   typedef dyld_image_info64 dyld_image_info;
235   typedef dyld_all_image_infos64 dyld_all_image_infos;
236   typedef section_64 mach_section_type;
237   typedef struct nlist_64 nlist_type;
238   static const uint32_t magic = MH_MAGIC_64;
239   static const uint32_t segment_load_command = LC_SEGMENT_64;
240 };
241 
242 template<typename MachBits>
FindTextSection(DynamicImage & image)243 bool FindTextSection(DynamicImage& image) {
244   typedef typename MachBits::mach_header_type mach_header_type;
245   typedef typename MachBits::mach_segment_command_type
246       mach_segment_command_type;
247   typedef typename MachBits::mach_section_type mach_section_type;
248 
249   const mach_header_type* header =
250       reinterpret_cast<const mach_header_type*>(&image.header_[0]);
251 
252   if(header->magic != MachBits::magic) {
253     return false;
254   }
255 
256   bool is_in_shared_cache = ((header->flags & MH_SHAREDCACHE) != 0);
257   if (is_in_shared_cache) {
258     image.slide_ = image.shared_cache_slide_;
259   }
260 
261   const struct load_command *cmd =
262       reinterpret_cast<const struct load_command *>(header + 1);
263 
264   bool retval = false;
265 
266   uint32_t num_data_sections = 0;
267   const mach_section_type *data_sections = NULL;
268   bool found_text_section = false;
269   bool found_dylib_id_command = false;
270   for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
271     if (!data_sections) {
272       if (cmd->cmd == MachBits::segment_load_command) {
273         const mach_segment_command_type *seg =
274           reinterpret_cast<const mach_segment_command_type *>(cmd);
275 
276         if (!strcmp(seg->segname, "__DATA")) {
277           num_data_sections = seg->nsects;
278           data_sections = reinterpret_cast<const mach_section_type *>(seg + 1);
279         }
280       }
281     }
282 
283     if (!found_text_section) {
284       if (cmd->cmd == MachBits::segment_load_command) {
285         const mach_segment_command_type *seg =
286             reinterpret_cast<const mach_segment_command_type *>(cmd);
287 
288         if (!is_in_shared_cache) {
289           if (seg->fileoff == 0 && seg->filesize != 0) {
290             image.slide_ =
291               (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr;
292           }
293         }
294 
295         if (!strcmp(seg->segname, "__TEXT")) {
296           image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr);
297           image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize);
298           found_text_section = true;
299         }
300       }
301     }
302 
303     if (!found_dylib_id_command) {
304       if (cmd->cmd == LC_ID_DYLIB) {
305         const struct dylib_command *dc =
306             reinterpret_cast<const struct dylib_command *>(cmd);
307 
308         image.version_ = dc->dylib.current_version;
309         found_dylib_id_command = true;
310       }
311     }
312 
313     if (found_dylib_id_command && found_text_section && data_sections) {
314       break;
315     }
316 
317     cmd = reinterpret_cast<const struct load_command *>
318         (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
319   }
320 
321   if (found_dylib_id_command && found_text_section) {
322     retval = true;
323   }
324 
325   // The __DYLD,__crash_info section may not be accessible in child process
326   // modules that aren't in the dyld shared cache.
327   if (is_in_shared_cache) {
328     for (unsigned int i = 0; i < num_data_sections; ++i) {
329       if (!strcmp(data_sections[i].sectname, "__crash_info")) {
330         ReadTaskMemory(image.task_,
331                        data_sections[i].addr + image.slide_,
332                        data_sections[i].size,
333                        image.crash_info_);
334         break;
335       }
336     }
337   }
338 
339   return retval;
340 }
341 
342 //==============================================================================
343 // Initializes vmaddr_, vmsize_, and slide_
CalculateMemoryAndVersionInfo()344 void DynamicImage::CalculateMemoryAndVersionInfo() {
345   // unless we can process the header, ensure that calls to
346   // IsValid() will return false
347   vmaddr_ = 0;
348   vmsize_ = 0;
349   slide_ = 0;
350   version_ = 0;
351 
352   // The function template above does all the real work.
353   if (Is64Bit())
354     FindTextSection<MachO64>(*this);
355   else
356     FindTextSection<MachO32>(*this);
357 }
358 
359 //==============================================================================
360 // The helper function template abstracts the 32/64-bit differences.
361 template<typename MachBits>
GetFileTypeFromHeader(DynamicImage & image)362 uint32_t GetFileTypeFromHeader(DynamicImage& image) {
363   typedef typename MachBits::mach_header_type mach_header_type;
364 
365   const mach_header_type* header =
366       reinterpret_cast<const mach_header_type*>(&image.header_[0]);
367   return header->filetype;
368 }
369 
GetFileType()370 uint32_t DynamicImage::GetFileType() {
371   if (Is64Bit())
372     return GetFileTypeFromHeader<MachO64>(*this);
373 
374   return GetFileTypeFromHeader<MachO32>(*this);
375 }
376 
377 #pragma mark -
378 
379 //==============================================================================
380 // Loads information about dynamically loaded code in the given task.
DynamicImages(mach_port_t task)381 DynamicImages::DynamicImages(mach_port_t task)
382     : task_(task),
383       cpu_type_(DetermineTaskCPUType(task)),
384       image_list_() {
385   ReadImageInfoForTask();
386 }
387 
388 template<typename MachBits>
LookupSymbol(const char * symbol_name,const char * filename,cpu_type_t cpu_type)389 static uint64_t LookupSymbol(const char* symbol_name,
390                              const char* filename,
391                              cpu_type_t cpu_type) {
392   typedef typename MachBits::nlist_type nlist_type;
393 
394   nlist_type symbol_info[8] = {};
395   const char *symbolNames[2] = { symbol_name, "\0" };
396   nlist_type &list = symbol_info[0];
397   int invalidEntriesCount = breakpad_nlist(filename,
398                                            &list,
399                                            symbolNames,
400                                            cpu_type);
401 
402   if(invalidEntriesCount != 0) {
403     return 0;
404   }
405 
406   assert(list.n_value);
407   return list.n_value;
408 }
409 
GetDyldAllImageInfosPointer()410 uint64_t DynamicImages::GetDyldAllImageInfosPointer() {
411   task_dyld_info_data_t task_dyld_info;
412   mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
413   if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
414                 &count) != KERN_SUCCESS) {
415     return 0;
416   }
417 
418   return (uint64_t)task_dyld_info.all_image_info_addr;
419 }
420 
421 //==============================================================================
422 // This code was written using dyld_debug.c (from Darwin) as a guide.
423 
424 template<typename MachBits>
ReadImageInfo(DynamicImages & images,uint64_t image_list_address)425 void ReadImageInfo(DynamicImages& images,
426                    uint64_t image_list_address) {
427   typedef typename MachBits::dyld_image_info dyld_image_info;
428   typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos;
429   typedef typename MachBits::mach_header_type mach_header_type;
430 
431   // Read the structure inside of dyld that contains information about
432   // loaded images.  We're reading from the desired task's address space.
433 
434   // Here we make the assumption that dyld loaded at the same address in
435   // the crashed process vs. this one.  This is an assumption made in
436   // "dyld_debug.c" and is said to be nearly always valid.
437   vector<uint8_t> dyld_all_info_bytes;
438   if (ReadTaskMemory(images.task_,
439                      image_list_address,
440                      sizeof(dyld_all_image_infos),
441                      dyld_all_info_bytes) != KERN_SUCCESS)
442     return;
443 
444   dyld_all_image_infos *dyldInfo =
445     reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]);
446 
447   // number of loaded images
448   int count = dyldInfo->infoArrayCount;
449 
450   // Read an array of dyld_image_info structures each containing
451   // information about a loaded image.
452   vector<uint8_t> dyld_info_array_bytes;
453     if (ReadTaskMemory(images.task_,
454                        dyldInfo->infoArray,
455                        count * sizeof(dyld_image_info),
456                        dyld_info_array_bytes) != KERN_SUCCESS)
457       return;
458 
459     dyld_image_info *infoArray =
460         reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]);
461     images.image_list_.reserve(count);
462 
463     for (int i = 0; i < count; ++i) {
464       dyld_image_info &info = infoArray[i];
465 
466       // First read just the mach_header from the image in the task.
467       vector<uint8_t> mach_header_bytes;
468       if (ReadTaskMemory(images.task_,
469                          info.load_address_,
470                          sizeof(mach_header_type),
471                          mach_header_bytes) != KERN_SUCCESS)
472         continue;  // bail on this dynamic image
473 
474       mach_header_type *header =
475           reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]);
476 
477       cpu_subtype_t cpusubtype = (header->cpusubtype & ~CPU_SUBTYPE_MASK);
478 
479       // Now determine the total amount necessary to read the header
480       // plus all of the load commands.
481       size_t header_size =
482           sizeof(mach_header_type) + header->sizeofcmds;
483 
484       if (ReadTaskMemory(images.task_,
485                          info.load_address_,
486                          header_size,
487                          mach_header_bytes) != KERN_SUCCESS)
488         continue;
489 
490       // Read the file name from the task's memory space.
491       string file_path;
492       if (info.file_path_) {
493         // Although we're reading kMaxStringLength bytes, it's copied in the
494         // the DynamicImage constructor below with the correct string length,
495         // so it's not really wasting memory.
496         file_path = ReadTaskString(images.task_, info.file_path_);
497       }
498 
499       // Create an object representing this image and add it to our list.
500       DynamicImage *new_image;
501       new_image = new DynamicImage(&mach_header_bytes[0],
502                                    header_size,
503                                    info.load_address_,
504                                    file_path,
505                                    static_cast<uintptr_t>(info.file_mod_date_),
506                                    images.task_,
507                                    images.cpu_type_,
508                                    cpusubtype,
509                                    dyldInfo->sharedCacheSlide);
510 
511       if (new_image->IsValid()) {
512         images.image_list_.push_back(DynamicImageRef(new_image));
513       } else {
514         delete new_image;
515       }
516     }
517 
518     // sorts based on loading address
519     sort(images.image_list_.begin(), images.image_list_.end());
520     // remove duplicates - this happens in certain strange cases
521     // You can see it in DashboardClient when Google Gadgets plugin
522     // is installed.  Apple's crash reporter log and gdb "info shared"
523     // both show the same library multiple times at the same address
524 
525     vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(),
526                                                   images.image_list_.end());
527     images.image_list_.erase(it, images.image_list_.end());
528 }
529 
ReadImageInfoForTask()530 void DynamicImages::ReadImageInfoForTask() {
531   uint64_t imageList = GetDyldAllImageInfosPointer();
532 
533   if (imageList) {
534     if (Is64Bit())
535       ReadImageInfo<MachO64>(*this, imageList);
536     else
537       ReadImageInfo<MachO32>(*this, imageList);
538   }
539 }
540 
541 //==============================================================================
GetExecutableImage()542 DynamicImage  *DynamicImages::GetExecutableImage() {
543   int executable_index = GetExecutableImageIndex();
544 
545   if (executable_index >= 0) {
546     return GetImage(executable_index);
547   }
548 
549   return NULL;
550 }
551 
552 //==============================================================================
553 // returns -1 if failure to find executable
GetExecutableImageIndex()554 int DynamicImages::GetExecutableImageIndex() {
555   int image_count = GetImageCount();
556 
557   for (int i = 0; i < image_count; ++i) {
558     DynamicImage  *image = GetImage(i);
559     if (image->GetFileType() == MH_EXECUTE) {
560       return i;
561     }
562   }
563 
564   return -1;
565 }
566 
567 //==============================================================================
568 // static
DetermineTaskCPUType(task_t task)569 cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) {
570   if (task == mach_task_self())
571     return GetNativeCPUType();
572 
573   int mib[CTL_MAXNAME];
574   size_t mibLen = CTL_MAXNAME;
575   int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen);
576   if (err == 0) {
577     assert(mibLen < CTL_MAXNAME);
578     pid_for_task(task, &mib[mibLen]);
579     mibLen += 1;
580 
581     cpu_type_t cpu_type;
582     size_t cpuTypeSize = sizeof(cpu_type);
583     sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0);
584     return cpu_type;
585   }
586 
587   return GetNativeCPUType();
588 }
589 
590 }  // namespace google_breakpad
591