1 // Copyright 2014 The Crashpad Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_
16 #define CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_
17 
18 #include <mach/mach.h>
19 #include <stdint.h>
20 #include <sys/types.h>
21 
22 #include <map>
23 #include <memory>
24 #include <string>
25 #include <vector>
26 
27 #include "base/macros.h"
28 #include "snapshot/mac/process_types.h"
29 #include "util/misc/initialization_state_dcheck.h"
30 #include "util/misc/uuid.h"
31 
32 namespace crashpad {
33 
34 class MachOImageSegmentReader;
35 class MachOImageSymbolTableReader;
36 class ProcessReaderMac;
37 
38 //! \brief A reader for Mach-O images mapped into another process.
39 //!
40 //! This class is capable of reading both 32-bit (`mach_header`/`MH_MAGIC`) and
41 //! 64-bit (`mach_header_64`/`MH_MAGIC_64`) images based on the bitness of the
42 //! remote process.
43 //!
44 //! \sa MachOImageAnnotationsReader
45 class MachOImageReader {
46  public:
47   MachOImageReader();
48   ~MachOImageReader();
49 
50   //! \brief Reads the Mach-O image file’s load commands from another process.
51   //!
52   //! This method must only be called once on an object. This method must be
53   //! called successfully before any other method in this class may be called.
54   //!
55   //! \param[in] process_reader The reader for the remote process.
56   //! \param[in] address The address, in the remote process’ address space,
57   //!     where the `mach_header` or `mach_header_64` at the beginning of the
58   //!     image to be read is located. This address can be determined by reading
59   //!     the remote process’ dyld information (see
60   //!     snapshot/mac/process_types/dyld_images.proctype).
61   //! \param[in] name The module’s name, a string to be used in logged messages.
62   //!     This string is for diagnostic purposes and to relax otherwise strict
63   //!     parsing rules for common modules with known defects.
64   //!
65   //! \return `true` if the image was read successfully, including all load
66   //!     commands. `false` otherwise, with an appropriate message logged.
67   bool Initialize(ProcessReaderMac* process_reader,
68                   mach_vm_address_t address,
69                   const std::string& name);
70 
71   //! \brief Returns the Mach-O file type.
72   //!
73   //! This value comes from the `filetype` field of the `mach_header` or
74   //! `mach_header_64`. Common values include `MH_EXECUTE`, `MH_DYLIB`,
75   //! `MH_DYLINKER`, and `MH_BUNDLE`.
FileType()76   uint32_t FileType() const { return file_type_; }
77 
78   //! \brief Returns the Mach-O image’s load address.
79   //!
80   //! This is the value passed as \a address to Initialize().
Address()81   mach_vm_address_t Address() const { return address_; }
82 
83   //! \brief Returns the mapped size of the Mach-O image’s `__TEXT` segment.
84   //!
85   //! Note that this is returns only the size of the `__TEXT` segment, not of
86   //! any other segment. This is because the interface only allows one load
87   //! address and size to be reported, but Mach-O image files may consist of
88   //! multiple discontiguous segments. By convention, the `__TEXT` segment is
89   //! always mapped at the beginning of a Mach-O image file, and it is the most
90   //! useful for the expected intended purpose of collecting data to obtain
91   //! stack backtraces. The implementation insists during initialization that
92   //! the `__TEXT` segment be mapped at the beginning of the file.
93   //!
94   //! In practice, discontiguous segments are only found for images that have
95   //! loaded out of the dyld shared cache, but the `__TEXT` segment’s size is
96   //! returned for modules that loaded with contiguous segments as well for
97   //! consistency.
Size()98   mach_vm_size_t Size() const { return size_; }
99 
100   //! \brief Returns the Mach-O image’s “slide,” the difference between its
101   //!     actual load address and its preferred load address.
102   //!
103   //! “Slide” is computed by subtracting the `__TEXT` segment’s preferred load
104   //! address from its actual load address. It will be reported as a positive
105   //! offset when the actual load address is greater than the preferred load
106   //! address. The preferred load address is taken to be the segment’s reported
107   //! `vmaddr` value.
Slide()108   mach_vm_size_t Slide() const { return slide_; }
109 
110   //! \brief Obtain segment information by segment name.
111   //!
112   //! \param[in] segment_name The name of the segment to search for, for
113   //!     example, `"__TEXT"`.
114   //!
115   //! \return A pointer to the segment information if it was found, or `nullptr`
116   //!     if it was not found. The caller does not take ownership; the lifetime
117   //!     of the returned object is scoped to the lifetime of this
118   //!     MachOImageReader object.
119   const MachOImageSegmentReader* GetSegmentByName(
120       const std::string& segment_name) const;
121 
122   //! \brief Obtain section information by segment and section name.
123   //!
124   //! \param[in] segment_name The name of the segment to search for, for
125   //!     example, `"__TEXT"`.
126   //! \param[in] section_name The name of the section within the segment to
127   //!     search for, for example, `"__text"`.
128   //! \param[out] address The actual address that the section was loaded at in
129   //!     memory, taking any “slide” into account if the section did not load at
130   //!     its preferred address as stored in the Mach-O image file. This
131   //!     parameter can be `nullptr`.
132   //!
133   //! \return A pointer to the section information if it was found, or `nullptr`
134   //!     if it was not found. The caller does not take ownership; the lifetime
135   //!     of the returned object is scoped to the lifetime of this
136   //!     MachOImageReader object.
137   //!
138   //! No parameter is provided for the section’s size, because it can be
139   //! obtained from the returned process_types::section::size field.
140   //!
141   //! \note The process_types::section::addr field gives the section’s preferred
142   //!     load address as stored in the Mach-O image file, and is not adjusted
143   //!     for any “slide” that may have occurred when the image was loaded. Use
144   //!     \a address to obtain the section’s actual load address.
145   const process_types::section* GetSectionByName(
146       const std::string& segment_name,
147       const std::string& section_name,
148       mach_vm_address_t* address) const;
149 
150   //! \brief Obtain section information by section index.
151   //!
152   //! \param[in] index The index of the section to return, in the order that it
153   //!     appears in the segment load commands. This is a 1-based index,
154   //!     matching the section number values used for `nlist::n_sect`.
155   //! \param[out] containing_segment The segment that contains the section.
156   //!     This parameter can be `nullptr`. The caller does not take ownership;
157   //!     the lifetime of the returned object is scoped to the lifetime of this
158   //!     MachOImageReader object.
159   //! \param[out] address The actual address that the section was loaded at in
160   //!     memory, taking any “slide” into account if the section did not load at
161   //!     its preferred address as stored in the Mach-O image file. This
162   //!     parameter can be `nullptr`.
163   //!
164   //! \return A pointer to the section information. If \a index is out of range,
165   //!     logs a warning and returns `nullptr`. The caller does not take
166   //!     ownership; the lifetime of the returned object is scoped to the
167   //!     lifetime of this MachOImageReader object.
168   //!
169   //! No parameter is provided for the section’s size, because it can be
170   //! obtained from the returned process_types::section::size field.
171   //!
172   //! \note The process_types::section::addr field gives the section’s preferred
173   //!     load address as stored in the Mach-O image file, and is not adjusted
174   //!     for any “slide” that may have occurred when the image was loaded. Use
175   //!     \a address to obtain the section’s actual load address.
176   //! \note Unlike MachOImageSegmentReader::GetSectionAtIndex(), this method
177   //!     accepts out-of-range values for \a index, and returns `nullptr`
178   //!     instead of aborting execution upon encountering an out-of-range value.
179   //!     This is because a Mach-O image file’s symbol table refers to this
180   //!     per-module section index, and an out-of-range index in that case
181   //!     should be treated as a data error (where the data is beyond this
182   //!     code’s control) and handled non-fatally by reporting the error to the
183   //!     caller.
184   const process_types::section* GetSectionAtIndex(
185       size_t index,
186       const MachOImageSegmentReader** containing_segment,
187       mach_vm_address_t* address) const;
188 
189   //! \brief Looks up a symbol in the image’s symbol table.
190   //!
191   //! This method is capable of locating external defined symbols. Specifically,
192   //! this method can look up symbols that have these charcteristics:
193   //!  - `N_STAB` (debugging) and `N_PEXT` (private external) must not be set.
194   //!  - `N_EXT` (external) must be set.
195   //!  - The type must be `N_ABS` (absolute) or `N_SECT` (defined in section).
196   //!
197   //! `N_INDR` (indirect), `N_UNDF` (undefined), and `N_PBUD` (prebound
198   //! undefined) symbols cannot be located through this mechanism.
199   //!
200   //! \param[in] name The name of the symbol to look up, “mangled” or
201   //!     “decorated” appropriately. For example, use `"_main"` to look up the
202   //!     symbol for the C `main()` function, and use `"__Z4Funcv"` to look up
203   //!     the symbol for the C++ `Func()` function. Contrary to `dlsym()`, the
204   //!     leading underscore must not be stripped when using this interface.
205   //! \param[out] value If the lookup was successful, this will be set to the
206   //!     value of the symbol, adjusted for any “slide” as needed. The value can
207   //!     be used as an address in the remote process’ address space where the
208   //!     pointee of the symbol exists in memory.
209   //!
210   //! \return `true` if the symbol lookup was successful and the symbol was
211   //!     found. `false` otherwise, including error conditions (for which a
212   //!     warning message will be logged), modules without symbol tables, and
213   //!     symbol names not found in the symbol table.
214   //!
215   //! \note Symbol values returned via this interface are adjusted for “slide”
216   //!     as appropriate, in contrast to the underlying implementation,
217   //!     MachOImageSymbolTableReader::LookUpExternalDefinedSymbol().
218   //!
219   //! \warning Symbols that are resolved by running symbol resolvers
220   //!     (`.symbol_resolver`) are not properly handled by this interface. The
221   //!     address of the symbol resolver is returned because that’s what shows
222   //!     up in the symbol table, rather than the effective address of the
223   //!     resolved symbol as used by dyld after running the resolver. The only
224   //!     way to detect this situation would be to read the `LC_DYLD_INFO` or
225   //!     `LC_DYLD_INFO_ONLY` load command if present and looking for the
226   //!     `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER` flag, but that would just be
227   //!     able to detect symbols with a resolver, it would not be able to
228   //!     resolve them from out-of-process, so it’s not currently done.
229   bool LookUpExternalDefinedSymbol(const std::string& name,
230                                    mach_vm_address_t* value) const;
231 
232   //! \brief Returns a Mach-O dylib image’s current version.
233   //!
234   //! This information comes from the `dylib_current_version` field of a dylib’s
235   //! `LC_ID_DYLIB` load command. For dylibs without this load command, `0` will
236   //! be returned.
237   //!
238   //! This method may only be called on Mach-O images for which FileType()
239   //! returns `MH_DYLIB`.
240   uint32_t DylibVersion() const;
241 
242   //! \brief Returns a Mach-O image’s source version.
243   //!
244   //! This information comes from a Mach-O image’s `LC_SOURCE_VERSION` load
245   //! command. For Mach-O images without this load command, `0` will be
246   //! returned.
SourceVersion()247   uint64_t SourceVersion() const { return source_version_; }
248 
249   //! \brief Returns a Mach-O image’s UUID.
250   //!
251   //! This information comes from a Mach-O image’s `LC_UUID` load command. For
252   //! Mach-O images without this load command, a zeroed-out UUID value will be
253   //! returned.
254   //
255   // UUID is a name in this scope (referring to this method), so the parameter’s
256   // type needs to be qualified with |crashpad::|.
257   void UUID(crashpad::UUID* uuid) const;
258 
259   //! \brief Returns the dynamic linker’s pathname.
260   //!
261   //! The dynamic linker is normally /usr/lib/dyld.
262   //!
263   //! For executable images (those with file type `MH_EXECUTE`), this is the
264   //! name provided in the `LC_LOAD_DYLINKER` load command, if any. For dynamic
265   //! linker images (those with file type `MH_DYLINKER`), this is the name
266   //! provided in the `LC_ID_DYLINKER` load command. In other cases, this will
267   //! be empty.
DylinkerName()268   std::string DylinkerName() const { return dylinker_name_; }
269 
270   //! \brief Obtains the module’s CrashpadInfo structure.
271   //!
272   //! \return `true` on success, `false` on failure. If the module does not have
273   //!     a `__DATA,crashpad_info` section, this will return `false` without
274   //!     logging any messages. Other failures will result in messages being
275   //!     logged.
276   bool GetCrashpadInfo(process_types::CrashpadInfo* crashpad_info) const;
277 
278  private:
279   // A generic helper routine for the other Read*Command() methods.
280   template <typename T>
281   bool ReadLoadCommand(mach_vm_address_t load_command_address,
282                        const std::string& load_command_info,
283                        uint32_t expected_load_command_id,
284                        T* load_command);
285 
286   // The Read*Command() methods are subroutines called by Initialize(). They are
287   // responsible for reading a single load command. They may update the member
288   // fields of their MachOImageReader object. If they can’t make sense of a load
289   // command, they return false.
290   bool ReadSegmentCommand(mach_vm_address_t load_command_address,
291                           const std::string& load_command_info);
292   bool ReadSymTabCommand(mach_vm_address_t load_command_address,
293                          const std::string& load_command_info);
294   bool ReadDySymTabCommand(mach_vm_address_t load_command_address,
295                            const std::string& load_command_info);
296   bool ReadIdDylibCommand(mach_vm_address_t load_command_address,
297                           const std::string& load_command_info);
298   bool ReadDylinkerCommand(mach_vm_address_t load_command_address,
299                            const std::string& load_command_info);
300   bool ReadUUIDCommand(mach_vm_address_t load_command_address,
301                        const std::string& load_command_info);
302   bool ReadSourceVersionCommand(mach_vm_address_t load_command_address,
303                                 const std::string& load_command_info);
304   bool ReadUnexpectedCommand(mach_vm_address_t load_command_address,
305                              const std::string& load_command_info);
306 
307   // Performs deferred initialization of the symbol table. Because a module’s
308   // symbol table is often not needed, this is not handled in Initialize(), but
309   // is done lazily, on-demand as needed.
310   //
311   // symbol_table_initialized_ will be transitioned to the appropriate state. If
312   // initialization completes successfully, this will be the valid state.
313   // Otherwise, it will be left in the invalid state and a warning message will
314   // be logged.
315   //
316   // Note that if the object contains no symbol table, symbol_table_initialized_
317   // will be set to the valid state, but symbol_table_ will be nullptr.
318   void InitializeSymbolTable() const;
319 
320   std::vector<std::unique_ptr<MachOImageSegmentReader>> segments_;
321   std::map<std::string, size_t> segment_map_;
322   std::string module_name_;
323   std::string module_info_;
324   std::string dylinker_name_;
325   crashpad::UUID uuid_;
326   mach_vm_address_t address_;
327   mach_vm_size_t size_;
328   mach_vm_size_t slide_;
329   uint64_t source_version_;
330   std::unique_ptr<process_types::symtab_command> symtab_command_;
331   std::unique_ptr<process_types::dysymtab_command> dysymtab_command_;
332 
333   // symbol_table_ (and symbol_table_initialized_) are mutable in order to
334   // maintain LookUpExternalDefinedSymbol() as a const interface while allowing
335   // lazy initialization via InitializeSymbolTable(). This is logical
336   // const-ness, not physical const-ness.
337   mutable std::unique_ptr<MachOImageSymbolTableReader> symbol_table_;
338 
339   std::unique_ptr<process_types::dylib_command> id_dylib_command_;
340   ProcessReaderMac* process_reader_;  // weak
341   uint32_t file_type_;
342   InitializationStateDcheck initialized_;
343 
344   // symbol_table_initialized_ protects symbol_table_: symbol_table_ can only
345   // be used when symbol_table_initialized_ is valid, although
346   // symbol_table_initialized_ being valid doesn’t imply that symbol_table_ is
347   // set. symbol_table_initialized_ will be valid without symbol_table_ being
348   // set in modules that have no symbol table.
349   mutable InitializationState symbol_table_initialized_;
350 
351   DISALLOW_COPY_AND_ASSIGN(MachOImageReader);
352 };
353 
354 }  // namespace crashpad
355 
356 #endif  // CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_
357