1 // Copyright 2014 The Crashpad Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_SEGMENT_READER_H_
16 #define CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_SEGMENT_READER_H_
17 
18 #include <mach/mach.h>
19 #include <stdint.h>
20 #include <sys/types.h>
21 
22 #include <map>
23 #include <string>
24 #include <vector>
25 
26 #include "base/macros.h"
27 #include "snapshot/mac/process_types.h"
28 #include "util/misc/initialization_state_dcheck.h"
29 
30 namespace crashpad {
31 
32 //! \brief Determines whether a module appears to be a malformed OpenCL
33 //!     `cl_kernels` module based on its name and Mach-O file type.
34 //!
35 //! `cl_kernels` modules require special handling because they’re malformed on
36 //! OS X 10.10 and later. A `cl_kernels` module always has Mach-O type
37 //! `MH_BUNDLE` and is named `"cl_kernels"` until macOS 10.14, and
38 //! `"/private/var/db/CVMS/cvmsCodeSignObj"` plus 16 random characters on macOS
39 //! 10.14.
40 //!
41 //! Malformed `cl_kernels` modules have a single `__TEXT` segment, but one of
42 //! the sections within it claims to belong to the `__LD` segment. This mismatch
43 //! shouldn’t happen. This errant section also has the `S_ATTR_DEBUG` flag set,
44 //! which shouldn’t happen unless all of the other sections in the segment also
45 //! have this bit set (they don’t). These odd sections are reminiscent of unwind
46 //! information stored in `MH_OBJECT` images, although `cl_kernels` images claim
47 //! to be `MH_BUNDLE`.
48 //!
49 //! This function is exposed for testing purposes only.
50 //!
51 //! \param[in] mach_o_file_type The Mach-O type of the module being examined.
52 //! \param[in] module_name The pathname that `dyld` reported having loaded the
53 //!     module from.
54 //! \param[out] has_timestamp Optional, may be `nullptr`. If provided, and the
55 //!     module is a maformed `cl_kernels` module, this will be set to `true` if
56 //!     the module was loaded from the filesystem (as is the case when loaded
57 //!     from the CVMS directory) and is expected to have a timestamp, and
58 //!     `false` otherwise. Note that even when loaded from the filesystem, these
59 //!     modules are unlinked from the filesystem after loading.
60 //!
61 //! \return `true` if the module appears to be a malformed `cl_kernels` module
62 //!     based on the provided information, `false` otherwise.
63 bool IsMalformedCLKernelsModule(uint32_t mach_o_file_type,
64                                 const std::string& module_name,
65                                 bool* has_timestamp);
66 
67 //! \brief A reader for `LC_SEGMENT` or `LC_SEGMENT_64` load commands in Mach-O
68 //!     images mapped into another process.
69 //!
70 //! This class is capable of reading both `LC_SEGMENT` and `LC_SEGMENT_64` based
71 //! on the bitness of the remote process.
72 //!
73 //! A MachOImageSegmentReader will normally be instantiated by a
74 //! MachOImageReader.
75 class MachOImageSegmentReader {
76  public:
77   MachOImageSegmentReader();
78   ~MachOImageSegmentReader();
79 
80   //! \brief Reads the segment load command from another process.
81   //!
82   //! This method must only be called once on an object. This method must be
83   //! called successfully before any other method in this class may be called.
84   //!
85   //! \param[in] process_reader The reader for the remote process.
86   //! \param[in] load_command_address The address, in the remote process’
87   //!     address space, where the `LC_SEGMENT` or `LC_SEGMENT_64` load command
88   //!     to be read is located. This address is determined by a Mach-O image
89   //!     reader, such as MachOImageReader, as it walks Mach-O load commands.
90   //! \param[in] load_command_info A string to be used in logged messages. This
91   //!     string is for diagnostic purposes only, and may be empty.
92   //! \param[in] module_name The path used to load the module. This string is
93   //!     used to relax otherwise strict parsing rules for common modules with
94   //!     known defects.
95   //! \param[in] file_type The module’s Mach-O file type. This is used to relax
96   //!     otherwise strict parsing rules for common modules with known defects.
97   //!
98   //! \return `true` if the load command was read successfully. `false`
99   //!     otherwise, with an appropriate message logged.
100   bool Initialize(ProcessReaderMac* process_reader,
101                   mach_vm_address_t load_command_address,
102                   const std::string& load_command_info,
103                   const std::string& module_name,
104                   uint32_t file_type);
105 
106   //! \brief Sets the image’s slide value.
107   //!
108   //! This method must only be called once on an object, after Initialize() is
109   //! called successfully. It must be called before Address(), Size(),
110   //! GetSectionByName(), or GetSectionAtIndex() can be called.
111   //!
112   //! This method is provided because slide is a property of the image that
113   //! cannot be determined until at least some segments have been read. As such,
114   //! it is not necessarily known at the time that Initialize() is called.
115   void SetSlide(mach_vm_size_t slide);
116 
117   //! \brief Returns the segment’s name.
118   //!
119   //! The segment’s name is taken from the load command’s `segname` field.
120   //! Common segment names are `"__TEXT"`, `"__DATA"`, and `"__LINKEDIT"`.
121   //! Symbolic constants for these common names are defined in
122   //! `<mach-o/loader.h>`.
123   std::string Name() const;
124 
125   //! \return The segment’s actual load address in memory, adjusted for any
126   //!     “slide”.
127   //!
128   //! \note For the segment’s preferred load address, not adjusted for slide,
129   //!     use vmaddr().
130   mach_vm_address_t Address() const;
131 
132   //! \return The segment’s actual size address in memory, adjusted for any
133   //!     growth in the case of a nonsliding segment.
134   //!
135   //! \note For the segment’s preferred size, not adjusted for growth, use
136   //!     vmsize().
137   mach_vm_address_t Size() const;
138 
139   //! \brief The segment’s preferred load address.
140   //!
141   //! \return The segment’s preferred load address as stored in the Mach-O file.
142   //!
143   //! \note This value is not adjusted for any “slide” that may have occurred
144   //!     when the image was loaded. Use Address() for a value adjusted for
145   //!     slide.
146   //!
147   //! \sa MachOImageReader::GetSegmentByName()
vmaddr()148   mach_vm_address_t vmaddr() const { return segment_command_.vmaddr; }
149 
150   //! \brief Returns the segment’s size as mapped into memory.
151   //!
152   //! \note For non-sliding segments, this value is not adjusted for any growth
153   //!     that may have occurred when the image was loaded. Use Size() for a
154   //!     value adjusted for growth.
vmsize()155   mach_vm_size_t vmsize() const { return segment_command_.vmsize; }
156 
157   //! \brief Returns the file offset of the mapped segment in the file from
158   //!     which it was mapped.
159   //!
160   //! The file offset is the difference between the beginning of the
161   //! `mach_header` or `mach_header_64` and the beginning of the segment’s
162   //! mapped region. For segments that are not mapped from a file (such as
163   //! `__PAGEZERO` segments), this will be `0`.
fileoff()164   mach_vm_size_t fileoff() const { return segment_command_.fileoff; }
165 
166   //! \brief Returns the number of sections in the segment.
167   //!
168   //! This will return `0` for a segment without any sections, typical for
169   //! `__PAGEZERO` and `__LINKEDIT` segments.
170   //!
171   //! Although the Mach-O file format uses a `uint32_t` for this field, there is
172   //! an overall limit of 255 sections in an entire Mach-O image file (not just
173   //! in a single segment) imposed by the symbol table format. Symbols will not
174   //! be able to reference anything in a section beyond the first 255 in a
175   //! Mach-O image file.
nsects()176   uint32_t nsects() const { return segment_command_.nsects; }
177 
178   //! \brief Obtain section information by section name.
179   //!
180   //! \param[in] section_name The name of the section to search for, without the
181   //!     leading segment name. For example, use `"__text"`, not
182   //!     `"__TEXT,__text"` or `"__TEXT.__text"`.
183   //! \param[out] address The actual address that the section was loaded at in
184   //!     memory, taking any “slide” into account if the section did not load at
185   //!     its preferred address as stored in the Mach-O image file. This
186   //!     parameter can be `nullptr`.
187   //!
188   //! \return A pointer to the section information if it was found, or `nullptr`
189   //!     if it was not found. The caller does not take ownership; the lifetime
190   //!     of the returned object is scoped to the lifetime of this
191   //!     MachOImageSegmentReader object.
192   //!
193   //! \note The process_types::section::addr field gives the section’s preferred
194   //!     load address as stored in the Mach-O image file, and is not adjusted
195   //!     for any “slide” that may have occurred when the image was loaded.
196   //!
197   //! \sa MachOImageReader::GetSectionByName()
198   const process_types::section* GetSectionByName(
199       const std::string& section_name,
200       mach_vm_address_t* address) const;
201 
202   //! \brief Obtain section information by section index.
203   //!
204   //! \param[in] index The index of the section to return, in the order that it
205   //!     appears in the segment load command. Unlike
206   //!     MachOImageReader::GetSectionAtIndex(), this is a 0-based index. This
207   //!     parameter must be in the range of valid indices aas reported by
208   //!     nsects().
209   //! \param[out] address The actual address that the section was loaded at in
210   //!     memory, taking any “slide” into account if the section did not load at
211   //!     its preferred address as stored in the Mach-O image file. This
212   //!     parameter can be `nullptr`.
213   //!
214   //! \return A pointer to the section information. If \a index is out of range,
215   //!     execution is aborted.  The caller does not take ownership; the
216   //!     lifetime of the returned object is scoped to the lifetime of this
217   //!     MachOImageSegmentReader object.
218   //!
219   //! \note The process_types::section::addr field gives the section’s preferred
220   //!     load address as stored in the Mach-O image file, and is not adjusted
221   //!     for any “slide” that may have occurred when the image was loaded.
222   //! \note Unlike MachOImageReader::GetSectionAtIndex(), this method does not
223   //!     accept out-of-range values for \a index, and aborts execution instead
224   //!     of returning `nullptr` upon encountering an out-of-range value. This
225   //!     is because this method is expected to be used in a loop that can be
226   //!     limited to nsects() iterations, so an out-of-range error can be
227   //!     treated more harshly as a logic error, as opposed to a data error.
228   //!
229   //! \sa MachOImageReader::GetSectionAtIndex()
230   const process_types::section* GetSectionAtIndex(
231       size_t index,
232       mach_vm_address_t* address) const;
233 
234   //! Returns whether the segment slides.
235   //!
236   //! Most segments slide, but the `__PAGEZERO` segment does not, it grows
237   //! instead. This method identifies non-sliding segments in the same way that
238   //! the kernel does.
239   bool SegmentSlides() const;
240 
241   //! \brief Returns a segment name string.
242   //!
243   //! Segment names may be 16 characters long, and are not necessarily
244   //! `NUL`-terminated. This function will return a segment name based on up to
245   //! the first 16 characters found at \a segment_name_c.
246   static std::string SegmentNameString(const char* segment_name_c);
247 
248   //! \brief Returns a section name string.
249   //!
250   //! Section names may be 16 characters long, and are not necessarily
251   //! `NUL`-terminated. This function will return a section name based on up to
252   //! the first 16 characters found at \a section_name_c.
253   static std::string SectionNameString(const char* section_name_c);
254 
255   //! \brief Returns a segment and section name string.
256   //!
257   //! A segment and section name string is composed of a segment name string
258   //! (see SegmentNameString()) and a section name string (see
259   //! SectionNameString()) separated by a comma. An example is
260   //! `"__TEXT,__text"`.
261   static std::string SegmentAndSectionNameString(const char* segment_name_c,
262                                                  const char* section_name_c);
263 
264  private:
265   //! \brief The internal implementation of Name().
266   //!
267   //! This is identical to Name() but does not perform the
268   //! InitializationStateDcheck check. It may be called during initialization
269   //! provided that the caller only does so after segment_command_ has been
270   //! read successfully.
271   std::string NameInternal() const;
272 
273   // The segment command data read from the remote process.
274   process_types::segment_command segment_command_;
275 
276   // Section structures read from the remote process in the order that they are
277   // given in the remote process.
278   std::vector<process_types::section> sections_;
279 
280   // Maps section names to indices into the sections_ vector.
281   std::map<std::string, size_t> section_map_;
282 
283   // The image’s slide. Note that the segment’s slide may be 0 and not the value
284   // of the image’s slide if SegmentSlides() is false. In that case, the
285   // segment is extended instead of slid, so its size as loaded will be
286   // increased by this value.
287   mach_vm_size_t slide_;
288 
289   InitializationStateDcheck initialized_;
290   InitializationStateDcheck initialized_slide_;
291 
292   DISALLOW_COPY_AND_ASSIGN(MachOImageSegmentReader);
293 };
294 
295 }  // namespace crashpad
296 
297 #endif  // CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_SEGMENT_READER_H_
298