1 // Copyright 2014 The Crashpad Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "snapshot/mac/process_reader_mac.h"
16 
17 #include <Availability.h>
18 #include <mach-o/loader.h>
19 #include <mach/mach_vm.h>
20 
21 #include <algorithm>
22 #include <utility>
23 
24 #include "base/logging.h"
25 #include "base/mac/mach_logging.h"
26 #include "base/mac/scoped_mach_port.h"
27 #include "base/mac/scoped_mach_vm.h"
28 #include "base/strings/stringprintf.h"
29 #include "snapshot/mac/mach_o_image_reader.h"
30 #include "snapshot/mac/process_types.h"
31 #include "util/misc/scoped_forbid_return.h"
32 
33 namespace {
34 
MachTimeValueToTimeval(const time_value & mach,timeval * tv)35 void MachTimeValueToTimeval(const time_value& mach, timeval* tv) {
36   tv->tv_sec = mach.seconds;
37   tv->tv_usec = mach.microseconds;
38 }
39 
MachVMRegionRecurseDeepest(task_t task,mach_vm_address_t * address,mach_vm_size_t * size,natural_t * depth,vm_prot_t * protection,unsigned int * user_tag)40 kern_return_t MachVMRegionRecurseDeepest(task_t task,
41                                          mach_vm_address_t* address,
42                                          mach_vm_size_t* size,
43                                          natural_t* depth,
44                                          vm_prot_t* protection,
45                                          unsigned int* user_tag) {
46   vm_region_submap_short_info_64 submap_info;
47   mach_msg_type_number_t count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
48   while (true) {
49     kern_return_t kr = mach_vm_region_recurse(
50         task,
51         address,
52         size,
53         depth,
54         reinterpret_cast<vm_region_recurse_info_t>(&submap_info),
55         &count);
56     if (kr != KERN_SUCCESS) {
57       return kr;
58     }
59 
60     if (!submap_info.is_submap) {
61       *protection = submap_info.protection;
62       *user_tag = submap_info.user_tag;
63       return KERN_SUCCESS;
64     }
65 
66     ++*depth;
67   }
68 }
69 
70 }  // namespace
71 
72 namespace crashpad {
73 
Thread()74 ProcessReaderMac::Thread::Thread()
75     : thread_context(),
76       float_context(),
77       debug_context(),
78       id(0),
79       stack_region_address(0),
80       stack_region_size(0),
81       thread_specific_data_address(0),
82       port(THREAD_NULL),
83       suspend_count(0),
84       priority(0) {}
85 
Module()86 ProcessReaderMac::Module::Module() : name(), reader(nullptr), timestamp(0) {}
87 
~Module()88 ProcessReaderMac::Module::~Module() {}
89 
ProcessReaderMac()90 ProcessReaderMac::ProcessReaderMac()
91     : process_info_(),
92       threads_(),
93       modules_(),
94       module_readers_(),
95       process_memory_(),
96       task_(TASK_NULL),
97       initialized_(),
98 #if defined(CRASHPAD_MAC_32_BIT_SUPPORT)
99       is_64_bit_(false),
100 #endif  // CRASHPAD_MAC_32_BIT_SUPPORT
101       initialized_threads_(false),
102       initialized_modules_(false) {
103 }
104 
~ProcessReaderMac()105 ProcessReaderMac::~ProcessReaderMac() {
106   for (const Thread& thread : threads_) {
107     kern_return_t kr = mach_port_deallocate(mach_task_self(), thread.port);
108     MACH_LOG_IF(ERROR, kr != KERN_SUCCESS, kr) << "mach_port_deallocate";
109   }
110 }
111 
Initialize(task_t task)112 bool ProcessReaderMac::Initialize(task_t task) {
113   INITIALIZATION_STATE_SET_INITIALIZING(initialized_);
114 
115   if (!process_info_.InitializeWithTask(task)) {
116     return false;
117   }
118 
119   if (!process_memory_.Initialize(task)) {
120     return false;
121   }
122 
123 #if defined(CRASHPAD_MAC_32_BIT_SUPPORT)
124   is_64_bit_ = process_info_.Is64Bit();
125 #else  // CRASHPAD_MAC_32_BIT_SUPPORT
126   DCHECK(process_info_.Is64Bit());
127 #endif  // CRASHPAD_MAC_32_BIT_SUPPORT
128 
129   task_ = task;
130 
131   INITIALIZATION_STATE_SET_VALID(initialized_);
132   return true;
133 }
134 
StartTime(timeval * start_time) const135 void ProcessReaderMac::StartTime(timeval* start_time) const {
136   bool rv = process_info_.StartTime(start_time);
137   DCHECK(rv);
138 }
139 
CPUTimes(timeval * user_time,timeval * system_time) const140 bool ProcessReaderMac::CPUTimes(timeval* user_time,
141                                 timeval* system_time) const {
142   INITIALIZATION_STATE_DCHECK_VALID(initialized_);
143 
144   // Calculate user and system time the same way the kernel does for
145   // getrusage(). See 10.9.2 xnu-2422.90.20/bsd/kern/kern_resource.c calcru().
146   timerclear(user_time);
147   timerclear(system_time);
148 
149   // As of the 10.8 SDK, the preferred routine is MACH_TASK_BASIC_INFO.
150   // TASK_BASIC_INFO_64 is equivalent and works on earlier systems.
151   task_basic_info_64 task_basic_info;
152   mach_msg_type_number_t task_basic_info_count = TASK_BASIC_INFO_64_COUNT;
153   kern_return_t kr = task_info(task_,
154                                TASK_BASIC_INFO_64,
155                                reinterpret_cast<task_info_t>(&task_basic_info),
156                                &task_basic_info_count);
157   if (kr != KERN_SUCCESS) {
158     MACH_LOG(WARNING, kr) << "task_info TASK_BASIC_INFO_64";
159     return false;
160   }
161 
162   task_thread_times_info_data_t task_thread_times;
163   mach_msg_type_number_t task_thread_times_count = TASK_THREAD_TIMES_INFO_COUNT;
164   kr = task_info(task_,
165                  TASK_THREAD_TIMES_INFO,
166                  reinterpret_cast<task_info_t>(&task_thread_times),
167                  &task_thread_times_count);
168   if (kr != KERN_SUCCESS) {
169     MACH_LOG(WARNING, kr) << "task_info TASK_THREAD_TIMES";
170     return false;
171   }
172 
173   MachTimeValueToTimeval(task_basic_info.user_time, user_time);
174   MachTimeValueToTimeval(task_basic_info.system_time, system_time);
175 
176   timeval thread_user_time;
177   MachTimeValueToTimeval(task_thread_times.user_time, &thread_user_time);
178   timeval thread_system_time;
179   MachTimeValueToTimeval(task_thread_times.system_time, &thread_system_time);
180 
181   timeradd(user_time, &thread_user_time, user_time);
182   timeradd(system_time, &thread_system_time, system_time);
183 
184   return true;
185 }
186 
Threads()187 const std::vector<ProcessReaderMac::Thread>& ProcessReaderMac::Threads() {
188   INITIALIZATION_STATE_DCHECK_VALID(initialized_);
189 
190   if (!initialized_threads_) {
191     InitializeThreads();
192   }
193 
194   return threads_;
195 }
196 
Modules()197 const std::vector<ProcessReaderMac::Module>& ProcessReaderMac::Modules() {
198   INITIALIZATION_STATE_DCHECK_VALID(initialized_);
199 
200   if (!initialized_modules_) {
201     InitializeModules();
202   }
203 
204   return modules_;
205 }
206 
DyldAllImageInfo(mach_vm_size_t * all_image_info_size)207 mach_vm_address_t ProcessReaderMac::DyldAllImageInfo(
208     mach_vm_size_t* all_image_info_size) {
209   INITIALIZATION_STATE_DCHECK_VALID(initialized_);
210 
211   task_dyld_info_data_t dyld_info;
212   mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
213   kern_return_t kr = task_info(
214       task_, TASK_DYLD_INFO, reinterpret_cast<task_info_t>(&dyld_info), &count);
215   if (kr != KERN_SUCCESS) {
216     MACH_LOG(WARNING, kr) << "task_info";
217     return 0;
218   }
219 
220 // TODO(mark): Deal with statically linked executables which don’t use dyld.
221 // This may look for the module that matches the executable path in the same
222 // data set that vmmap uses.
223 
224 #if __MAC_OS_X_VERSION_MAX_ALLOWED >= __MAC_10_7
225   // The task_dyld_info_data_t struct grew in 10.7, adding the format field.
226   // Don’t check this field if it’s not present, which can happen when either
227   // the SDK used at compile time or the kernel at run time are too old and
228   // don’t know about it.
229   if (count >= TASK_DYLD_INFO_COUNT) {
230     const integer_t kExpectedFormat =
231         !Is64Bit() ? TASK_DYLD_ALL_IMAGE_INFO_32 : TASK_DYLD_ALL_IMAGE_INFO_64;
232     if (dyld_info.all_image_info_format != kExpectedFormat) {
233       LOG(WARNING) << "unexpected task_dyld_info_data_t::all_image_info_format "
234                    << dyld_info.all_image_info_format;
235       DCHECK_EQ(dyld_info.all_image_info_format, kExpectedFormat);
236       return 0;
237     }
238   }
239 #endif
240 
241   if (all_image_info_size) {
242     *all_image_info_size = dyld_info.all_image_info_size;
243   }
244   return dyld_info.all_image_info_addr;
245 }
246 
InitializeThreads()247 void ProcessReaderMac::InitializeThreads() {
248   DCHECK(!initialized_threads_);
249   DCHECK(threads_.empty());
250 
251   initialized_threads_ = true;
252 
253   thread_act_array_t threads;
254   mach_msg_type_number_t thread_count = 0;
255   kern_return_t kr = task_threads(task_, &threads, &thread_count);
256   if (kr != KERN_SUCCESS) {
257     MACH_LOG(WARNING, kr) << "task_threads";
258     return;
259   }
260 
261   // The send rights in the |threads| array won’t have their send rights managed
262   // by anything until they’re added to |threads_| by the loop below. Any early
263   // return (or exception) that happens between here and the completion of the
264   // loop below will leak thread port send rights.
265   ScopedForbidReturn threads_need_owners;
266 
267   base::mac::ScopedMachVM threads_vm(
268       reinterpret_cast<vm_address_t>(threads),
269       mach_vm_round_page(thread_count * sizeof(*threads)));
270 
271   for (size_t index = 0; index < thread_count; ++index) {
272     Thread thread;
273     thread.port = threads[index];
274 
275 #if defined(ARCH_CPU_X86_FAMILY)
276     const thread_state_flavor_t kThreadStateFlavor =
277         Is64Bit() ? x86_THREAD_STATE64 : x86_THREAD_STATE32;
278     mach_msg_type_number_t thread_state_count =
279         Is64Bit() ? x86_THREAD_STATE64_COUNT : x86_THREAD_STATE32_COUNT;
280 
281     // TODO(mark): Use the AVX variants instead of the FLOAT variants?
282     const thread_state_flavor_t kFloatStateFlavor =
283         Is64Bit() ? x86_FLOAT_STATE64 : x86_FLOAT_STATE32;
284     mach_msg_type_number_t float_state_count =
285         Is64Bit() ? x86_FLOAT_STATE64_COUNT : x86_FLOAT_STATE32_COUNT;
286 
287     const thread_state_flavor_t kDebugStateFlavor =
288         Is64Bit() ? x86_DEBUG_STATE64 : x86_DEBUG_STATE32;
289     mach_msg_type_number_t debug_state_count =
290         Is64Bit() ? x86_DEBUG_STATE64_COUNT : x86_DEBUG_STATE32_COUNT;
291 #elif defined(ARCH_CPU_ARM64)
292     const thread_state_flavor_t kThreadStateFlavor = ARM_THREAD_STATE64;
293     mach_msg_type_number_t thread_state_count = ARM_THREAD_STATE64_COUNT;
294 
295     const thread_state_flavor_t kFloatStateFlavor = ARM_NEON_STATE64;
296     mach_msg_type_number_t float_state_count = ARM_NEON_STATE64_COUNT;
297 
298     const thread_state_flavor_t kDebugStateFlavor = ARM_DEBUG_STATE64;
299     mach_msg_type_number_t debug_state_count = ARM_DEBUG_STATE64_COUNT;
300 #endif
301 
302     kr = thread_get_state(
303         thread.port,
304         kThreadStateFlavor,
305         reinterpret_cast<thread_state_t>(&thread.thread_context),
306         &thread_state_count);
307     if (kr != KERN_SUCCESS) {
308       MACH_LOG(ERROR, kr) << "thread_get_state(" << kThreadStateFlavor << ")";
309       continue;
310     }
311 
312     kr = thread_get_state(
313         thread.port,
314         kFloatStateFlavor,
315         reinterpret_cast<thread_state_t>(&thread.float_context),
316         &float_state_count);
317     if (kr != KERN_SUCCESS) {
318       MACH_LOG(ERROR, kr) << "thread_get_state(" << kFloatStateFlavor << ")";
319       continue;
320     }
321 
322     kr = thread_get_state(
323         thread.port,
324         kDebugStateFlavor,
325         reinterpret_cast<thread_state_t>(&thread.debug_context),
326         &debug_state_count);
327     if (kr != KERN_SUCCESS) {
328       MACH_LOG(ERROR, kr) << "thread_get_state(" << kDebugStateFlavor << ")";
329       continue;
330     }
331 
332     thread_basic_info basic_info;
333     mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
334     kr = thread_info(thread.port,
335                      THREAD_BASIC_INFO,
336                      reinterpret_cast<thread_info_t>(&basic_info),
337                      &count);
338     if (kr != KERN_SUCCESS) {
339       MACH_LOG(WARNING, kr) << "thread_info(THREAD_BASIC_INFO)";
340     } else {
341       thread.suspend_count = basic_info.suspend_count;
342     }
343 
344     thread_identifier_info identifier_info;
345     count = THREAD_IDENTIFIER_INFO_COUNT;
346     kr = thread_info(thread.port,
347                      THREAD_IDENTIFIER_INFO,
348                      reinterpret_cast<thread_info_t>(&identifier_info),
349                      &count);
350     if (kr != KERN_SUCCESS) {
351       MACH_LOG(WARNING, kr) << "thread_info(THREAD_IDENTIFIER_INFO)";
352     } else {
353       thread.id = identifier_info.thread_id;
354 
355       // thread_identifier_info::thread_handle contains the base of the
356       // thread-specific data area, which on x86 and x86_64 is the thread’s base
357       // address of the %gs segment. 10.9.2 xnu-2422.90.20/osfmk/kern/thread.c
358       // thread_info_internal() gets the value from
359       // machine_thread::cthread_self, which is the same value used to set the
360       // %gs base in xnu-2422.90.20/osfmk/i386/pcb_native.c
361       // act_machine_switch_pcb().
362       //
363       // This address is the internal pthread’s _pthread::tsd[], an array of
364       // void* values that can be indexed by pthread_key_t values.
365       thread.thread_specific_data_address = identifier_info.thread_handle;
366     }
367 
368     thread_precedence_policy precedence;
369     count = THREAD_PRECEDENCE_POLICY_COUNT;
370     boolean_t get_default = FALSE;
371     kr = thread_policy_get(thread.port,
372                            THREAD_PRECEDENCE_POLICY,
373                            reinterpret_cast<thread_policy_t>(&precedence),
374                            &count,
375                            &get_default);
376     if (kr != KERN_SUCCESS) {
377       MACH_LOG(INFO, kr) << "thread_policy_get";
378     } else {
379       thread.priority = precedence.importance;
380     }
381 
382 #if defined(ARCH_CPU_X86_FAMILY)
383     mach_vm_address_t stack_pointer = Is64Bit()
384                                           ? thread.thread_context.t64.__rsp
385                                           : thread.thread_context.t32.__esp;
386 #elif defined(ARCH_CPU_ARM64)
387     mach_vm_address_t stack_pointer =
388         arm_thread_state64_get_sp(thread.thread_context);
389 #endif
390 
391     thread.stack_region_address =
392         CalculateStackRegion(stack_pointer, &thread.stack_region_size);
393 
394     threads_.push_back(thread);
395   }
396 
397   threads_need_owners.Disarm();
398 }
399 
InitializeModules()400 void ProcessReaderMac::InitializeModules() {
401   DCHECK(!initialized_modules_);
402   DCHECK(modules_.empty());
403 
404   initialized_modules_ = true;
405 
406   mach_vm_size_t all_image_info_size;
407   mach_vm_address_t all_image_info_addr =
408       DyldAllImageInfo(&all_image_info_size);
409 
410   process_types::dyld_all_image_infos all_image_infos;
411   if (!all_image_infos.Read(this, all_image_info_addr)) {
412     LOG(WARNING) << "could not read dyld_all_image_infos";
413     return;
414   }
415 
416   if (all_image_infos.version < 1) {
417     LOG(WARNING) << "unexpected dyld_all_image_infos version "
418                  << all_image_infos.version;
419     return;
420   }
421 
422   size_t expected_size =
423       process_types::dyld_all_image_infos::ExpectedSizeForVersion(
424           this, all_image_infos.version);
425   if (all_image_info_size < expected_size) {
426     LOG(WARNING) << "small dyld_all_image_infos size " << all_image_info_size
427                  << " < " << expected_size << " for version "
428                  << all_image_infos.version;
429     return;
430   }
431 
432   // Note that all_image_infos.infoArrayCount may be 0 if a crash occurred while
433   // dyld was loading the executable. This can happen if a required dynamic
434   // library was not found. Similarly, all_image_infos.infoArray may be nullptr
435   // if a crash occurred while dyld was updating it.
436   //
437   // TODO(mark): It may be possible to recover from these situations by looking
438   // through memory mappings for Mach-O images.
439   //
440   // Continue along when this situation is detected, because even without any
441   // images in infoArray, dyldImageLoadAddress may be set, and it may be
442   // possible to recover some information from dyld.
443   if (all_image_infos.infoArrayCount == 0) {
444     LOG(WARNING) << "all_image_infos.infoArrayCount is zero";
445   } else if (!all_image_infos.infoArray) {
446     LOG(WARNING) << "all_image_infos.infoArray is nullptr";
447   }
448 
449   std::vector<process_types::dyld_image_info> image_info_vector(
450       all_image_infos.infoArrayCount);
451   if (!process_types::dyld_image_info::ReadArrayInto(this,
452                                                      all_image_infos.infoArray,
453                                                      image_info_vector.size(),
454                                                      &image_info_vector[0])) {
455     LOG(WARNING) << "could not read dyld_image_info array";
456     return;
457   }
458 
459   size_t main_executable_count = 0;
460   bool found_dyld = false;
461   modules_.reserve(image_info_vector.size());
462   for (const process_types::dyld_image_info& image_info : image_info_vector) {
463     Module module;
464     module.timestamp = image_info.imageFileModDate;
465 
466     if (!process_memory_.ReadCString(image_info.imageFilePath, &module.name)) {
467       LOG(WARNING) << "could not read dyld_image_info::imageFilePath";
468       // Proceed anyway with an empty module name.
469     }
470 
471     std::unique_ptr<MachOImageReader> reader(new MachOImageReader());
472     if (!reader->Initialize(this, image_info.imageLoadAddress, module.name)) {
473       reader.reset();
474     }
475 
476     module.reader = reader.get();
477 
478     uint32_t file_type = reader ? reader->FileType() : 0;
479 
480     module_readers_.push_back(std::move(reader));
481     modules_.push_back(module);
482 
483     if (all_image_infos.version >= 2 && all_image_infos.dyldImageLoadAddress &&
484         image_info.imageLoadAddress == all_image_infos.dyldImageLoadAddress) {
485       found_dyld = true;
486       LOG(WARNING) << base::StringPrintf(
487           "found dylinker (%s) in dyld_all_image_infos::infoArray",
488           module.name.c_str());
489 
490       LOG_IF(WARNING, file_type != MH_DYLINKER)
491           << base::StringPrintf("dylinker (%s) has unexpected Mach-O type %d",
492                                 module.name.c_str(),
493                                 file_type);
494     }
495 
496     if (file_type == MH_EXECUTE) {
497       // On Mac OS X 10.6, the main executable does not normally show up at
498       // index 0. This is because of how 10.6.8 dyld-132.13/src/dyld.cpp
499       // notifyGDB(), the function resposible for causing
500       // dyld_all_image_infos::infoArray to be updated, is called. It is
501       // registered to be called when all dependents of an image have been
502       // mapped (dyld_image_state_dependents_mapped), meaning that the main
503       // executable won’t be added to the list until all of the libraries it
504       // depends on are, even though dyld begins looking at the main executable
505       // first. This changed in later versions of dyld, including those present
506       // in 10.7. 10.9.4 dyld-239.4/src/dyld.cpp updateAllImages() (renamed from
507       // notifyGDB()) is registered to be called when an image itself has been
508       // mapped (dyld_image_state_mapped), regardless of the libraries that it
509       // depends on.
510       //
511       // The interface requires that the main executable be first in the list,
512       // so swap it into the right position.
513       size_t index = modules_.size() - 1;
514       if (main_executable_count == 0) {
515         std::swap(modules_[0], modules_[index]);
516       } else {
517         LOG(WARNING) << base::StringPrintf(
518             "multiple MH_EXECUTE modules (%s, %s)",
519             modules_[0].name.c_str(),
520             modules_[index].name.c_str());
521       }
522       ++main_executable_count;
523     }
524   }
525 
526   LOG_IF(WARNING, main_executable_count == 0) << "no MH_EXECUTE modules";
527 
528   // all_image_infos.infoArray doesn’t include an entry for dyld, but dyld is
529   // loaded into the process’ address space as a module. Its load address is
530   // easily known given a sufficiently recent all_image_infos.version, but the
531   // timestamp and pathname are not given as they are for other modules.
532   //
533   // The timestamp is a lost cause, because the kernel doesn’t record the
534   // timestamp of the dynamic linker at the time it’s loaded in the same way
535   // that dyld records the timestamps of other modules when they’re loaded. (The
536   // timestamp for the main executable is also not reported and appears as 0
537   // even when accessed via dyld APIs, because it’s loaded by the kernel, not by
538   // dyld.)
539   //
540   // The name can be determined, but it’s not as simple as hardcoding the
541   // default "/usr/lib/dyld" because an executable could have specified anything
542   // in its LC_LOAD_DYLINKER command.
543   if (!found_dyld && all_image_infos.version >= 2 &&
544       all_image_infos.dyldImageLoadAddress) {
545     Module module;
546     module.timestamp = 0;
547 
548     // Examine the executable’s LC_LOAD_DYLINKER load command to find the path
549     // used to load dyld.
550     if (all_image_infos.infoArrayCount >= 1 && main_executable_count >= 1) {
551       module.name = modules_[0].reader->DylinkerName();
552     }
553     std::string module_name = !module.name.empty() ? module.name : "(dyld)";
554 
555     std::unique_ptr<MachOImageReader> reader(new MachOImageReader());
556     if (!reader->Initialize(
557             this, all_image_infos.dyldImageLoadAddress, module_name)) {
558       reader.reset();
559     }
560 
561     module.reader = reader.get();
562 
563     uint32_t file_type = reader ? reader->FileType() : 0;
564 
565     LOG_IF(WARNING, file_type != MH_DYLINKER)
566         << base::StringPrintf("dylinker (%s) has unexpected Mach-O type %d",
567                               module.name.c_str(),
568                               file_type);
569 
570     if (module.name.empty() && file_type == MH_DYLINKER) {
571       // Look inside dyld directly to find its preferred path.
572       module.name = reader->DylinkerName();
573     }
574 
575     if (module.name.empty()) {
576       module.name = "(dyld)";
577     }
578 
579     // dyld is loaded in the process even if its path can’t be determined.
580     module_readers_.push_back(std::move(reader));
581     modules_.push_back(module);
582   }
583 }
584 
CalculateStackRegion(mach_vm_address_t stack_pointer,mach_vm_size_t * stack_region_size)585 mach_vm_address_t ProcessReaderMac::CalculateStackRegion(
586     mach_vm_address_t stack_pointer,
587     mach_vm_size_t* stack_region_size) {
588   INITIALIZATION_STATE_DCHECK_VALID(initialized_);
589 
590   // For pthreads, it may be possible to compute the stack region based on the
591   // internal _pthread::stackaddr and _pthread::stacksize. The _pthread struct
592   // for a thread can be located at TSD slot 0, or the known offsets of
593   // stackaddr and stacksize from the TSD area could be used.
594   mach_vm_address_t region_base = stack_pointer;
595   mach_vm_size_t region_size;
596   natural_t depth = 0;
597   vm_prot_t protection;
598   unsigned int user_tag;
599   kern_return_t kr = MachVMRegionRecurseDeepest(
600       task_, &region_base, &region_size, &depth, &protection, &user_tag);
601   if (kr != KERN_SUCCESS) {
602     MACH_LOG(INFO, kr) << "mach_vm_region_recurse";
603     *stack_region_size = 0;
604     return 0;
605   }
606 
607   if (region_base > stack_pointer) {
608     // There’s nothing mapped at the stack pointer’s address. Something may have
609     // trashed the stack pointer. Note that this shouldn’t happen for a normal
610     // stack guard region violation because the guard region is mapped but has
611     // VM_PROT_NONE protection.
612     *stack_region_size = 0;
613     return 0;
614   }
615 
616   mach_vm_address_t start_address = stack_pointer;
617 
618   if ((protection & VM_PROT_READ) == 0) {
619     // If the region isn’t readable, the stack pointer probably points to the
620     // guard region. Don’t include it as part of the stack, and don’t include
621     // anything at any lower memory address. The code below may still possibly
622     // find the real stack region at a memory address higher than this region.
623     start_address = region_base + region_size;
624   } else {
625     // If the ABI requires a red zone, adjust the region to include it if
626     // possible.
627     LocateRedZone(&start_address, &region_base, &region_size, user_tag);
628 
629     // Regardless of whether the ABI requires a red zone, capture up to
630     // kExtraCaptureSize additional bytes of stack, but only if present in the
631     // region that was already found.
632     constexpr mach_vm_size_t kExtraCaptureSize = 128;
633     start_address = std::max(start_address >= kExtraCaptureSize
634                                  ? start_address - kExtraCaptureSize
635                                  : start_address,
636                              region_base);
637 
638     // Align start_address to a 16-byte boundary, which can help readers by
639     // ensuring that data is aligned properly. This could page-align instead,
640     // but that might be wasteful.
641     constexpr mach_vm_size_t kDesiredAlignment = 16;
642     start_address &= ~(kDesiredAlignment - 1);
643     DCHECK_GE(start_address, region_base);
644   }
645 
646   region_size -= (start_address - region_base);
647   region_base = start_address;
648 
649   mach_vm_size_t total_region_size = region_size;
650 
651   // The stack region may have gotten split up into multiple abutting regions.
652   // Try to coalesce them. This frequently happens for the main thread’s stack
653   // when setrlimit(RLIMIT_STACK, …) is called. It may also happen if a region
654   // is split up due to an mprotect() or vm_protect() call.
655   //
656   // Stack regions created by the kernel and the pthreads library will be marked
657   // with the VM_MEMORY_STACK user tag. Scanning for multiple adjacent regions
658   // with the same tag should find an entire stack region. Checking that the
659   // protection on individual regions is not VM_PROT_NONE should guarantee that
660   // this algorithm doesn’t collect map entries belonging to another thread’s
661   // stack: well-behaved stacks (such as those created by the kernel and the
662   // pthreads library) have VM_PROT_NONE guard regions at their low-address
663   // ends.
664   //
665   // Other stack regions may not be so well-behaved and thus if user_tag is not
666   // VM_MEMORY_STACK, the single region that was found is used as-is without
667   // trying to merge it with other adjacent regions.
668   if (user_tag == VM_MEMORY_STACK) {
669     mach_vm_address_t try_address = region_base;
670     mach_vm_address_t original_try_address;
671 
672     while (try_address += region_size,
673            original_try_address = try_address,
674            (kr = MachVMRegionRecurseDeepest(task_,
675                                             &try_address,
676                                             &region_size,
677                                             &depth,
678                                             &protection,
679                                             &user_tag) == KERN_SUCCESS) &&
680                try_address == original_try_address &&
681                (protection & VM_PROT_READ) != 0 &&
682                user_tag == VM_MEMORY_STACK) {
683       total_region_size += region_size;
684     }
685 
686     if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
687       // Tolerate KERN_INVALID_ADDRESS because it will be returned when there
688       // are no more regions in the map at or above the specified |try_address|.
689       MACH_LOG(INFO, kr) << "mach_vm_region_recurse";
690     }
691   }
692 
693   *stack_region_size = total_region_size;
694   return region_base;
695 }
696 
LocateRedZone(mach_vm_address_t * const start_address,mach_vm_address_t * const region_base,mach_vm_address_t * const region_size,const unsigned int user_tag)697 void ProcessReaderMac::LocateRedZone(mach_vm_address_t* const start_address,
698                                      mach_vm_address_t* const region_base,
699                                      mach_vm_address_t* const region_size,
700                                      const unsigned int user_tag) {
701 #if defined(ARCH_CPU_X86_FAMILY)
702   if (Is64Bit()) {
703     // x86_64 has a red zone. See AMD64 ABI 0.99.8,
704     // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-r252.pdf#page=19,
705     // section 3.2.2, “The Stack Frame”.
706     constexpr mach_vm_size_t kRedZoneSize = 128;
707     mach_vm_address_t red_zone_base =
708         *start_address >= kRedZoneSize ? *start_address - kRedZoneSize : 0;
709     bool red_zone_ok = false;
710     if (red_zone_base >= *region_base) {
711       // The red zone is within the region already discovered.
712       red_zone_ok = true;
713     } else if (red_zone_base < *region_base && user_tag == VM_MEMORY_STACK) {
714       // Probe to see if there’s a region immediately below the one already
715       // discovered.
716       mach_vm_address_t red_zone_region_base = red_zone_base;
717       mach_vm_size_t red_zone_region_size;
718       natural_t red_zone_depth = 0;
719       vm_prot_t red_zone_protection;
720       unsigned int red_zone_user_tag;
721       kern_return_t kr = MachVMRegionRecurseDeepest(task_,
722                                                     &red_zone_region_base,
723                                                     &red_zone_region_size,
724                                                     &red_zone_depth,
725                                                     &red_zone_protection,
726                                                     &red_zone_user_tag);
727       if (kr != KERN_SUCCESS) {
728         MACH_LOG(INFO, kr) << "mach_vm_region_recurse";
729         *start_address = *region_base;
730       } else if (red_zone_region_base + red_zone_region_size == *region_base &&
731                  (red_zone_protection & VM_PROT_READ) != 0 &&
732                  red_zone_user_tag == user_tag) {
733         // The region containing the red zone is immediately below the region
734         // already found, it’s readable (not the guard region), and it has the
735         // same user tag as the region already found, so merge them.
736         red_zone_ok = true;
737         *region_base -= red_zone_region_size;
738         *region_size += red_zone_region_size;
739       }
740     }
741 
742     if (red_zone_ok) {
743       // Begin capturing from the base of the red zone (but not the entire
744       // region that encompasses the red zone).
745       *start_address = red_zone_base;
746     } else {
747       // The red zone would go lower into another region in memory, but no
748       // region was found. Memory can only be captured to an address as low as
749       // the base address of the region already found.
750       *start_address = *region_base;
751     }
752   }
753 #endif
754 }
755 
756 }  // namespace crashpad
757