1 // Copyright (c) 2007, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 extern "C" { // needed to compile on Leopard
31 #include <mach-o/nlist.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 }
35
36 #include "breakpad_nlist_64.h"
37 #include <dlfcn.h>
38 #include <mach/mach_vm.h>
39 #include <algorithm>
40 #include "client/mac/handler/dynamic_images.h"
41
42 namespace google_breakpad {
43
44 //==============================================================================
45 // Returns the size of the memory region containing |address| and the
46 // number of bytes from |address| to the end of the region.
47 // We potentially, will extend the size of the original
48 // region by the size of the following region if it's contiguous with the
49 // first in order to handle cases when we're reading strings and they
50 // straddle two vm regions.
51 //
GetMemoryRegionSize(task_port_t target_task,const void * address,mach_vm_size_t * size_to_end)52 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
53 const void* address,
54 mach_vm_size_t *size_to_end) {
55 mach_vm_address_t region_base = (mach_vm_address_t)address;
56 mach_vm_size_t region_size;
57 natural_t nesting_level = 0;
58 vm_region_submap_info_64 submap_info;
59 mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
60
61 // Get information about the vm region containing |address|
62 vm_region_recurse_info_t region_info;
63 region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
64
65 kern_return_t result =
66 mach_vm_region_recurse(target_task,
67 ®ion_base,
68 ®ion_size,
69 &nesting_level,
70 region_info,
71 &info_count);
72
73 if (result == KERN_SUCCESS) {
74 // Get distance from |address| to the end of this region
75 *size_to_end = region_base + region_size -(mach_vm_address_t)address;
76
77 // If we want to handle strings as long as 4096 characters we may need
78 // to check if there's a vm region immediately following the first one.
79 // If so, we need to extend |*size_to_end| to go all the way to the end
80 // of the second region.
81 if (*size_to_end < 4096) {
82 // Second region starts where the first one ends
83 mach_vm_address_t region_base2 =
84 (mach_vm_address_t)(region_base + region_size);
85 mach_vm_size_t region_size2;
86
87 // Get information about the following vm region
88 result =
89 mach_vm_region_recurse(target_task,
90 ®ion_base2,
91 ®ion_size2,
92 &nesting_level,
93 region_info,
94 &info_count);
95
96 // Extend region_size to go all the way to the end of the 2nd region
97 if (result == KERN_SUCCESS
98 && region_base2 == region_base + region_size) {
99 region_size += region_size2;
100 }
101 }
102
103 *size_to_end = region_base + region_size -(mach_vm_address_t)address;
104 } else {
105 region_size = 0;
106 *size_to_end = 0;
107 }
108
109 return region_size;
110 }
111
112 #define kMaxStringLength 8192
113 //==============================================================================
114 // Reads a NULL-terminated string from another task.
115 //
116 // Warning! This will not read any strings longer than kMaxStringLength-1
117 //
ReadTaskString(task_port_t target_task,const void * address)118 static void* ReadTaskString(task_port_t target_task,
119 const void* address) {
120 // The problem is we don't know how much to read until we know how long
121 // the string is. And we don't know how long the string is, until we've read
122 // the memory! So, we'll try to read kMaxStringLength bytes
123 // (or as many bytes as we can until we reach the end of the vm region).
124 mach_vm_size_t size_to_end;
125 GetMemoryRegionSize(target_task, address, &size_to_end);
126
127 if (size_to_end > 0) {
128 mach_vm_size_t size_to_read =
129 size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
130
131 kern_return_t kr;
132 return ReadTaskMemory(target_task, address, size_to_read, &kr);
133 }
134
135 return NULL;
136 }
137
138 //==============================================================================
139 // Reads an address range from another task. A block of memory is malloced
140 // and should be freed by the caller.
ReadTaskMemory(task_port_t target_task,const void * address,size_t length,kern_return_t * kr)141 void* ReadTaskMemory(task_port_t target_task,
142 const void* address,
143 size_t length,
144 kern_return_t *kr) {
145 void* result = NULL;
146 int systemPageSize = getpagesize();
147
148 // use the negative of the page size for the mask to find the page address
149 mach_vm_address_t page_address =
150 reinterpret_cast<mach_vm_address_t>(address) & (-systemPageSize);
151
152 mach_vm_address_t last_page_address =
153 (reinterpret_cast<mach_vm_address_t>(address) + length +
154 (systemPageSize - 1)) & (-systemPageSize);
155
156 mach_vm_size_t page_size = last_page_address - page_address;
157 uint8_t* local_start;
158 uint32_t local_length;
159
160 kern_return_t r;
161
162 r = mach_vm_read(target_task,
163 page_address,
164 page_size,
165 reinterpret_cast<vm_offset_t*>(&local_start),
166 &local_length);
167
168
169 if (kr != NULL) {
170 *kr = r;
171 }
172
173 if (r == KERN_SUCCESS) {
174 result = malloc(length);
175 if (result != NULL) {
176 memcpy(result,
177 &local_start[(mach_vm_address_t)address - page_address],
178 length);
179 }
180 mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
181 }
182
183 return result;
184 }
185
186 #pragma mark -
187
188 //==============================================================================
189 // Initializes vmaddr_, vmsize_, and slide_
CalculateMemoryAndVersionInfo()190 void DynamicImage::CalculateMemoryAndVersionInfo() {
191 breakpad_mach_header *header = GetMachHeader();
192
193 // unless we can process the header, ensure that calls to
194 // IsValid() will return false
195 vmaddr_ = 0;
196 vmsize_ = 0;
197 slide_ = 0;
198 version_ = 0;
199
200 bool foundTextSection = false;
201 bool foundDylibIDCommand = false;
202
203 #if __LP64__
204 if(header->magic != MH_MAGIC_64) {
205 return;
206 }
207 #else
208 if(header->magic != MH_MAGIC) {
209 return;
210 }
211 #endif
212
213 #ifdef __LP64__
214 const uint32_t segmentLoadCommand = LC_SEGMENT_64;
215 #else
216 const uint32_t segmentLoadCommand = LC_SEGMENT;
217 #endif
218
219 const struct load_command *cmd =
220 reinterpret_cast<const struct load_command *>(header + 1);
221
222 for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
223 if (!foundTextSection) {
224 if (cmd->cmd == segmentLoadCommand) {
225 const breakpad_mach_segment_command *seg =
226 reinterpret_cast<const breakpad_mach_segment_command *>(cmd);
227
228 if (!strcmp(seg->segname, "__TEXT")) {
229 vmaddr_ = seg->vmaddr;
230 vmsize_ = seg->vmsize;
231 slide_ = 0;
232
233 if (seg->fileoff == 0 && seg->filesize != 0) {
234 slide_ = (uintptr_t)GetLoadAddress() - (uintptr_t)seg->vmaddr;
235 }
236 foundTextSection = true;
237 }
238 }
239 }
240
241 if (!foundDylibIDCommand) {
242 if (cmd->cmd == LC_ID_DYLIB) {
243 const struct dylib_command *dc =
244 reinterpret_cast<const struct dylib_command *>(cmd);
245
246 version_ = dc->dylib.current_version;
247 foundDylibIDCommand = true;
248 }
249 }
250
251 if (foundDylibIDCommand && foundTextSection) {
252 return;
253 }
254
255 cmd = reinterpret_cast<const struct load_command *>
256 (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
257 }
258
259 }
260
Print()261 void DynamicImage::Print() {
262 const char *path = GetFilePath();
263 if (!path) {
264 path = "(unknown)";
265 }
266 printf("%p: %s\n", GetLoadAddress(), path);
267 breakpad_mach_header *header = GetMachHeader();
268 MachHeader(*header).Print();
269 printf("vmaddr\t\t: %p\n", reinterpret_cast<void*>(GetVMAddr()));
270 printf("vmsize\t\t: %llu\n", GetVMSize());
271 printf("slide\t\t: %td\n", GetVMAddrSlide());
272 }
273
274 #pragma mark -
275
276 //==============================================================================
277 // Loads information about dynamically loaded code in the given task.
DynamicImages(mach_port_t task)278 DynamicImages::DynamicImages(mach_port_t task)
279 : task_(task) {
280 ReadImageInfoForTask();
281 }
282
GetDyldAllImageInfosPointer()283 void* DynamicImages::GetDyldAllImageInfosPointer()
284 {
285
286 const char *imageSymbolName = "_dyld_all_image_infos";
287 const char *dyldPath = "/usr/lib/dyld";
288 #ifndef __LP64__
289 struct nlist l[8];
290 memset(l, 0, sizeof(l) );
291
292 // First we lookup the address of the "_dyld_all_image_infos" struct
293 // which lives in "dyld". This structure contains information about all
294 // of the loaded dynamic images.
295 struct nlist &list = l[0];
296 list.n_un.n_name = const_cast<char *>(imageSymbolName);
297 nlist(dyldPath,&list);
298 if(list.n_value) {
299 return reinterpret_cast<void*>(list.n_value);
300 }
301
302 return NULL;
303 #else
304 struct nlist_64 l[8];
305 struct nlist_64 &list = l[0];
306
307 memset(l, 0, sizeof(l) );
308
309 const char *symbolNames[2] = { imageSymbolName, "\0" };
310
311 int invalidEntriesCount = breakpad_nlist_64(dyldPath,&list,symbolNames);
312
313 if(invalidEntriesCount != 0) {
314 return NULL;
315 }
316 assert(list.n_value);
317 return reinterpret_cast<void*>(list.n_value);
318 #endif
319
320 }
321 //==============================================================================
322 // This code was written using dyld_debug.c (from Darwin) as a guide.
ReadImageInfoForTask()323 void DynamicImages::ReadImageInfoForTask() {
324 void *imageList = GetDyldAllImageInfosPointer();
325
326 if (imageList) {
327 kern_return_t kr;
328 // Read the structure inside of dyld that contains information about
329 // loaded images. We're reading from the desired task's address space.
330
331 // Here we make the assumption that dyld loaded at the same address in
332 // the crashed process vs. this one. This is an assumption made in
333 // "dyld_debug.c" and is said to be nearly always valid.
334 dyld_all_image_infos *dyldInfo = reinterpret_cast<dyld_all_image_infos*>
335 (ReadTaskMemory(task_,
336 reinterpret_cast<void*>(imageList),
337 sizeof(dyld_all_image_infos), &kr));
338
339 if (dyldInfo) {
340 // number of loaded images
341 int count = dyldInfo->infoArrayCount;
342
343 // Read an array of dyld_image_info structures each containing
344 // information about a loaded image.
345 dyld_image_info *infoArray = reinterpret_cast<dyld_image_info*>
346 (ReadTaskMemory(task_,
347 dyldInfo->infoArray,
348 count*sizeof(dyld_image_info), &kr));
349
350 image_list_.reserve(count);
351
352 for (int i = 0; i < count; ++i) {
353 dyld_image_info &info = infoArray[i];
354
355 // First read just the mach_header from the image in the task.
356 breakpad_mach_header *header = reinterpret_cast<breakpad_mach_header*>
357 (ReadTaskMemory(task_,
358 info.load_address_,
359 sizeof(breakpad_mach_header), &kr));
360
361 if (!header)
362 break; // bail on this dynamic image
363
364 // Now determine the total amount we really want to read based on the
365 // size of the load commands. We need the header plus all of the
366 // load commands.
367 unsigned int header_size =
368 sizeof(breakpad_mach_header) + header->sizeofcmds;
369
370 free(header);
371
372 header = reinterpret_cast<breakpad_mach_header*>
373 (ReadTaskMemory(task_, info.load_address_, header_size, &kr));
374
375 // Read the file name from the task's memory space.
376 char *file_path = NULL;
377 if (info.file_path_) {
378 // Although we're reading kMaxStringLength bytes, it's copied in the
379 // the DynamicImage constructor below with the correct string length,
380 // so it's not really wasting memory.
381 file_path = reinterpret_cast<char*>
382 (ReadTaskString(task_, info.file_path_));
383 }
384
385 // Create an object representing this image and add it to our list.
386 DynamicImage *new_image;
387 new_image = new DynamicImage(header,
388 header_size,
389 (breakpad_mach_header*)info.load_address_,
390 file_path,
391 info.file_mod_date_,
392 task_);
393
394 if (new_image->IsValid()) {
395 image_list_.push_back(DynamicImageRef(new_image));
396 } else {
397 delete new_image;
398 }
399
400 if (file_path) {
401 free(file_path);
402 }
403 }
404
405 free(dyldInfo);
406 free(infoArray);
407
408 // sorts based on loading address
409 sort(image_list_.begin(), image_list_.end() );
410 // remove duplicates - this happens in certain strange cases
411 // You can see it in DashboardClient when Google Gadgets plugin
412 // is installed. Apple's crash reporter log and gdb "info shared"
413 // both show the same library multiple times at the same address
414
415 vector<DynamicImageRef>::iterator it = unique(image_list_.begin(),
416 image_list_.end() );
417 image_list_.erase(it, image_list_.end());
418 }
419 }
420 }
421
422 //==============================================================================
GetExecutableImage()423 DynamicImage *DynamicImages::GetExecutableImage() {
424 int executable_index = GetExecutableImageIndex();
425
426 if (executable_index >= 0) {
427 return GetImage(executable_index);
428 }
429
430 return NULL;
431 }
432
433 //==============================================================================
434 // returns -1 if failure to find executable
GetExecutableImageIndex()435 int DynamicImages::GetExecutableImageIndex() {
436 int image_count = GetImageCount();
437
438 for (int i = 0; i < image_count; ++i) {
439 DynamicImage *image = GetImage(i);
440 if (image->GetMachHeader()->filetype == MH_EXECUTE) {
441 return i;
442 }
443 }
444
445 return -1;
446 }
447
448 } // namespace google_breakpad
449