1 // Copyright 2014 The Crashpad Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_ 16 #define CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_ 17 18 #include <mach/mach.h> 19 #include <stdint.h> 20 #include <sys/types.h> 21 22 #include <map> 23 #include <memory> 24 #include <string> 25 #include <vector> 26 27 #include "base/macros.h" 28 #include "snapshot/mac/process_types.h" 29 #include "util/misc/initialization_state_dcheck.h" 30 #include "util/misc/uuid.h" 31 32 namespace crashpad { 33 34 class MachOImageSegmentReader; 35 class MachOImageSymbolTableReader; 36 class ProcessReaderMac; 37 38 //! \brief A reader for Mach-O images mapped into another process. 39 //! 40 //! This class is capable of reading both 32-bit (`mach_header`/`MH_MAGIC`) and 41 //! 64-bit (`mach_header_64`/`MH_MAGIC_64`) images based on the bitness of the 42 //! remote process. 43 //! 44 //! \sa MachOImageAnnotationsReader 45 class MachOImageReader { 46 public: 47 MachOImageReader(); 48 ~MachOImageReader(); 49 50 //! \brief Reads the Mach-O image file’s load commands from another process. 51 //! 52 //! This method must only be called once on an object. This method must be 53 //! called successfully before any other method in this class may be called. 54 //! 55 //! \param[in] process_reader The reader for the remote process. 56 //! \param[in] address The address, in the remote process’ address space, 57 //! where the `mach_header` or `mach_header_64` at the beginning of the 58 //! image to be read is located. This address can be determined by reading 59 //! the remote process’ dyld information (see 60 //! snapshot/mac/process_types/dyld_images.proctype). 61 //! \param[in] name The module’s name, a string to be used in logged messages. 62 //! This string is for diagnostic purposes and to relax otherwise strict 63 //! parsing rules for common modules with known defects. 64 //! 65 //! \return `true` if the image was read successfully, including all load 66 //! commands. `false` otherwise, with an appropriate message logged. 67 bool Initialize(ProcessReaderMac* process_reader, 68 mach_vm_address_t address, 69 const std::string& name); 70 71 //! \brief Returns the Mach-O file type. 72 //! 73 //! This value comes from the `filetype` field of the `mach_header` or 74 //! `mach_header_64`. Common values include `MH_EXECUTE`, `MH_DYLIB`, 75 //! `MH_DYLINKER`, and `MH_BUNDLE`. FileType()76 uint32_t FileType() const { return file_type_; } 77 78 //! \brief Returns the Mach-O image’s load address. 79 //! 80 //! This is the value passed as \a address to Initialize(). Address()81 mach_vm_address_t Address() const { return address_; } 82 83 //! \brief Returns the mapped size of the Mach-O image’s `__TEXT` segment. 84 //! 85 //! Note that this is returns only the size of the `__TEXT` segment, not of 86 //! any other segment. This is because the interface only allows one load 87 //! address and size to be reported, but Mach-O image files may consist of 88 //! multiple discontiguous segments. By convention, the `__TEXT` segment is 89 //! always mapped at the beginning of a Mach-O image file, and it is the most 90 //! useful for the expected intended purpose of collecting data to obtain 91 //! stack backtraces. The implementation insists during initialization that 92 //! the `__TEXT` segment be mapped at the beginning of the file. 93 //! 94 //! In practice, discontiguous segments are only found for images that have 95 //! loaded out of the dyld shared cache, but the `__TEXT` segment’s size is 96 //! returned for modules that loaded with contiguous segments as well for 97 //! consistency. Size()98 mach_vm_size_t Size() const { return size_; } 99 100 //! \brief Returns the Mach-O image’s “slide,” the difference between its 101 //! actual load address and its preferred load address. 102 //! 103 //! “Slide” is computed by subtracting the `__TEXT` segment’s preferred load 104 //! address from its actual load address. It will be reported as a positive 105 //! offset when the actual load address is greater than the preferred load 106 //! address. The preferred load address is taken to be the segment’s reported 107 //! `vmaddr` value. Slide()108 mach_vm_size_t Slide() const { return slide_; } 109 110 //! \brief Obtain segment information by segment name. 111 //! 112 //! \param[in] segment_name The name of the segment to search for, for 113 //! example, `"__TEXT"`. 114 //! 115 //! \return A pointer to the segment information if it was found, or `nullptr` 116 //! if it was not found. The caller does not take ownership; the lifetime 117 //! of the returned object is scoped to the lifetime of this 118 //! MachOImageReader object. 119 const MachOImageSegmentReader* GetSegmentByName( 120 const std::string& segment_name) const; 121 122 //! \brief Obtain section information by segment and section name. 123 //! 124 //! \param[in] segment_name The name of the segment to search for, for 125 //! example, `"__TEXT"`. 126 //! \param[in] section_name The name of the section within the segment to 127 //! search for, for example, `"__text"`. 128 //! \param[out] address The actual address that the section was loaded at in 129 //! memory, taking any “slide” into account if the section did not load at 130 //! its preferred address as stored in the Mach-O image file. This 131 //! parameter can be `nullptr`. 132 //! 133 //! \return A pointer to the section information if it was found, or `nullptr` 134 //! if it was not found. The caller does not take ownership; the lifetime 135 //! of the returned object is scoped to the lifetime of this 136 //! MachOImageReader object. 137 //! 138 //! No parameter is provided for the section’s size, because it can be 139 //! obtained from the returned process_types::section::size field. 140 //! 141 //! \note The process_types::section::addr field gives the section’s preferred 142 //! load address as stored in the Mach-O image file, and is not adjusted 143 //! for any “slide” that may have occurred when the image was loaded. Use 144 //! \a address to obtain the section’s actual load address. 145 const process_types::section* GetSectionByName( 146 const std::string& segment_name, 147 const std::string& section_name, 148 mach_vm_address_t* address) const; 149 150 //! \brief Obtain section information by section index. 151 //! 152 //! \param[in] index The index of the section to return, in the order that it 153 //! appears in the segment load commands. This is a 1-based index, 154 //! matching the section number values used for `nlist::n_sect`. 155 //! \param[out] containing_segment The segment that contains the section. 156 //! This parameter can be `nullptr`. The caller does not take ownership; 157 //! the lifetime of the returned object is scoped to the lifetime of this 158 //! MachOImageReader object. 159 //! \param[out] address The actual address that the section was loaded at in 160 //! memory, taking any “slide” into account if the section did not load at 161 //! its preferred address as stored in the Mach-O image file. This 162 //! parameter can be `nullptr`. 163 //! 164 //! \return A pointer to the section information. If \a index is out of range, 165 //! logs a warning and returns `nullptr`. The caller does not take 166 //! ownership; the lifetime of the returned object is scoped to the 167 //! lifetime of this MachOImageReader object. 168 //! 169 //! No parameter is provided for the section’s size, because it can be 170 //! obtained from the returned process_types::section::size field. 171 //! 172 //! \note The process_types::section::addr field gives the section’s preferred 173 //! load address as stored in the Mach-O image file, and is not adjusted 174 //! for any “slide” that may have occurred when the image was loaded. Use 175 //! \a address to obtain the section’s actual load address. 176 //! \note Unlike MachOImageSegmentReader::GetSectionAtIndex(), this method 177 //! accepts out-of-range values for \a index, and returns `nullptr` 178 //! instead of aborting execution upon encountering an out-of-range value. 179 //! This is because a Mach-O image file’s symbol table refers to this 180 //! per-module section index, and an out-of-range index in that case 181 //! should be treated as a data error (where the data is beyond this 182 //! code’s control) and handled non-fatally by reporting the error to the 183 //! caller. 184 const process_types::section* GetSectionAtIndex( 185 size_t index, 186 const MachOImageSegmentReader** containing_segment, 187 mach_vm_address_t* address) const; 188 189 //! \brief Looks up a symbol in the image’s symbol table. 190 //! 191 //! This method is capable of locating external defined symbols. Specifically, 192 //! this method can look up symbols that have these charcteristics: 193 //! - `N_STAB` (debugging) and `N_PEXT` (private external) must not be set. 194 //! - `N_EXT` (external) must be set. 195 //! - The type must be `N_ABS` (absolute) or `N_SECT` (defined in section). 196 //! 197 //! `N_INDR` (indirect), `N_UNDF` (undefined), and `N_PBUD` (prebound 198 //! undefined) symbols cannot be located through this mechanism. 199 //! 200 //! \param[in] name The name of the symbol to look up, “mangled” or 201 //! “decorated” appropriately. For example, use `"_main"` to look up the 202 //! symbol for the C `main()` function, and use `"__Z4Funcv"` to look up 203 //! the symbol for the C++ `Func()` function. Contrary to `dlsym()`, the 204 //! leading underscore must not be stripped when using this interface. 205 //! \param[out] value If the lookup was successful, this will be set to the 206 //! value of the symbol, adjusted for any “slide” as needed. The value can 207 //! be used as an address in the remote process’ address space where the 208 //! pointee of the symbol exists in memory. 209 //! 210 //! \return `true` if the symbol lookup was successful and the symbol was 211 //! found. `false` otherwise, including error conditions (for which a 212 //! warning message will be logged), modules without symbol tables, and 213 //! symbol names not found in the symbol table. 214 //! 215 //! \note Symbol values returned via this interface are adjusted for “slide” 216 //! as appropriate, in contrast to the underlying implementation, 217 //! MachOImageSymbolTableReader::LookUpExternalDefinedSymbol(). 218 //! 219 //! \warning Symbols that are resolved by running symbol resolvers 220 //! (`.symbol_resolver`) are not properly handled by this interface. The 221 //! address of the symbol resolver is returned because that’s what shows 222 //! up in the symbol table, rather than the effective address of the 223 //! resolved symbol as used by dyld after running the resolver. The only 224 //! way to detect this situation would be to read the `LC_DYLD_INFO` or 225 //! `LC_DYLD_INFO_ONLY` load command if present and looking for the 226 //! `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER` flag, but that would just be 227 //! able to detect symbols with a resolver, it would not be able to 228 //! resolve them from out-of-process, so it’s not currently done. 229 bool LookUpExternalDefinedSymbol(const std::string& name, 230 mach_vm_address_t* value) const; 231 232 //! \brief Returns a Mach-O dylib image’s current version. 233 //! 234 //! This information comes from the `dylib_current_version` field of a dylib’s 235 //! `LC_ID_DYLIB` load command. For dylibs without this load command, `0` will 236 //! be returned. 237 //! 238 //! This method may only be called on Mach-O images for which FileType() 239 //! returns `MH_DYLIB`. 240 uint32_t DylibVersion() const; 241 242 //! \brief Returns a Mach-O image’s source version. 243 //! 244 //! This information comes from a Mach-O image’s `LC_SOURCE_VERSION` load 245 //! command. For Mach-O images without this load command, `0` will be 246 //! returned. SourceVersion()247 uint64_t SourceVersion() const { return source_version_; } 248 249 //! \brief Returns a Mach-O image’s UUID. 250 //! 251 //! This information comes from a Mach-O image’s `LC_UUID` load command. For 252 //! Mach-O images without this load command, a zeroed-out UUID value will be 253 //! returned. 254 // 255 // UUID is a name in this scope (referring to this method), so the parameter’s 256 // type needs to be qualified with |crashpad::|. 257 void UUID(crashpad::UUID* uuid) const; 258 259 //! \brief Returns the dynamic linker’s pathname. 260 //! 261 //! The dynamic linker is normally /usr/lib/dyld. 262 //! 263 //! For executable images (those with file type `MH_EXECUTE`), this is the 264 //! name provided in the `LC_LOAD_DYLINKER` load command, if any. For dynamic 265 //! linker images (those with file type `MH_DYLINKER`), this is the name 266 //! provided in the `LC_ID_DYLINKER` load command. In other cases, this will 267 //! be empty. DylinkerName()268 std::string DylinkerName() const { return dylinker_name_; } 269 270 //! \brief Obtains the module’s CrashpadInfo structure. 271 //! 272 //! \return `true` on success, `false` on failure. If the module does not have 273 //! a `__DATA,crashpad_info` section, this will return `false` without 274 //! logging any messages. Other failures will result in messages being 275 //! logged. 276 bool GetCrashpadInfo(process_types::CrashpadInfo* crashpad_info) const; 277 278 private: 279 // A generic helper routine for the other Read*Command() methods. 280 template <typename T> 281 bool ReadLoadCommand(mach_vm_address_t load_command_address, 282 const std::string& load_command_info, 283 uint32_t expected_load_command_id, 284 T* load_command); 285 286 // The Read*Command() methods are subroutines called by Initialize(). They are 287 // responsible for reading a single load command. They may update the member 288 // fields of their MachOImageReader object. If they can’t make sense of a load 289 // command, they return false. 290 bool ReadSegmentCommand(mach_vm_address_t load_command_address, 291 const std::string& load_command_info); 292 bool ReadSymTabCommand(mach_vm_address_t load_command_address, 293 const std::string& load_command_info); 294 bool ReadDySymTabCommand(mach_vm_address_t load_command_address, 295 const std::string& load_command_info); 296 bool ReadIdDylibCommand(mach_vm_address_t load_command_address, 297 const std::string& load_command_info); 298 bool ReadDylinkerCommand(mach_vm_address_t load_command_address, 299 const std::string& load_command_info); 300 bool ReadUUIDCommand(mach_vm_address_t load_command_address, 301 const std::string& load_command_info); 302 bool ReadSourceVersionCommand(mach_vm_address_t load_command_address, 303 const std::string& load_command_info); 304 bool ReadUnexpectedCommand(mach_vm_address_t load_command_address, 305 const std::string& load_command_info); 306 307 // Performs deferred initialization of the symbol table. Because a module’s 308 // symbol table is often not needed, this is not handled in Initialize(), but 309 // is done lazily, on-demand as needed. 310 // 311 // symbol_table_initialized_ will be transitioned to the appropriate state. If 312 // initialization completes successfully, this will be the valid state. 313 // Otherwise, it will be left in the invalid state and a warning message will 314 // be logged. 315 // 316 // Note that if the object contains no symbol table, symbol_table_initialized_ 317 // will be set to the valid state, but symbol_table_ will be nullptr. 318 void InitializeSymbolTable() const; 319 320 std::vector<std::unique_ptr<MachOImageSegmentReader>> segments_; 321 std::map<std::string, size_t> segment_map_; 322 std::string module_name_; 323 std::string module_info_; 324 std::string dylinker_name_; 325 crashpad::UUID uuid_; 326 mach_vm_address_t address_; 327 mach_vm_size_t size_; 328 mach_vm_size_t slide_; 329 uint64_t source_version_; 330 std::unique_ptr<process_types::symtab_command> symtab_command_; 331 std::unique_ptr<process_types::dysymtab_command> dysymtab_command_; 332 333 // symbol_table_ (and symbol_table_initialized_) are mutable in order to 334 // maintain LookUpExternalDefinedSymbol() as a const interface while allowing 335 // lazy initialization via InitializeSymbolTable(). This is logical 336 // const-ness, not physical const-ness. 337 mutable std::unique_ptr<MachOImageSymbolTableReader> symbol_table_; 338 339 std::unique_ptr<process_types::dylib_command> id_dylib_command_; 340 ProcessReaderMac* process_reader_; // weak 341 uint32_t file_type_; 342 InitializationStateDcheck initialized_; 343 344 // symbol_table_initialized_ protects symbol_table_: symbol_table_ can only 345 // be used when symbol_table_initialized_ is valid, although 346 // symbol_table_initialized_ being valid doesn’t imply that symbol_table_ is 347 // set. symbol_table_initialized_ will be valid without symbol_table_ being 348 // set in modules that have no symbol table. 349 mutable InitializationState symbol_table_initialized_; 350 351 DISALLOW_COPY_AND_ASSIGN(MachOImageReader); 352 }; 353 354 } // namespace crashpad 355 356 #endif // CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_ 357