1 // Copyright 2014 The Crashpad Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_SEGMENT_READER_H_ 16 #define CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_SEGMENT_READER_H_ 17 18 #include <mach/mach.h> 19 #include <stdint.h> 20 #include <sys/types.h> 21 22 #include <map> 23 #include <string> 24 #include <vector> 25 26 #include "base/macros.h" 27 #include "snapshot/mac/process_types.h" 28 #include "util/misc/initialization_state_dcheck.h" 29 30 namespace crashpad { 31 32 //! \brief Determines whether a module appears to be a malformed OpenCL 33 //! `cl_kernels` module based on its name and Mach-O file type. 34 //! 35 //! `cl_kernels` modules require special handling because they’re malformed on 36 //! OS X 10.10 and later. A `cl_kernels` module always has Mach-O type 37 //! `MH_BUNDLE` and is named `"cl_kernels"` until macOS 10.14, and 38 //! `"/private/var/db/CVMS/cvmsCodeSignObj"` plus 16 random characters on macOS 39 //! 10.14. 40 //! 41 //! Malformed `cl_kernels` modules have a single `__TEXT` segment, but one of 42 //! the sections within it claims to belong to the `__LD` segment. This mismatch 43 //! shouldn’t happen. This errant section also has the `S_ATTR_DEBUG` flag set, 44 //! which shouldn’t happen unless all of the other sections in the segment also 45 //! have this bit set (they don’t). These odd sections are reminiscent of unwind 46 //! information stored in `MH_OBJECT` images, although `cl_kernels` images claim 47 //! to be `MH_BUNDLE`. 48 //! 49 //! This function is exposed for testing purposes only. 50 //! 51 //! \param[in] mach_o_file_type The Mach-O type of the module being examined. 52 //! \param[in] module_name The pathname that `dyld` reported having loaded the 53 //! module from. 54 //! \param[out] has_timestamp Optional, may be `nullptr`. If provided, and the 55 //! module is a maformed `cl_kernels` module, this will be set to `true` if 56 //! the module was loaded from the filesystem (as is the case when loaded 57 //! from the CVMS directory) and is expected to have a timestamp, and 58 //! `false` otherwise. Note that even when loaded from the filesystem, these 59 //! modules are unlinked from the filesystem after loading. 60 //! 61 //! \return `true` if the module appears to be a malformed `cl_kernels` module 62 //! based on the provided information, `false` otherwise. 63 bool IsMalformedCLKernelsModule(uint32_t mach_o_file_type, 64 const std::string& module_name, 65 bool* has_timestamp); 66 67 //! \brief A reader for `LC_SEGMENT` or `LC_SEGMENT_64` load commands in Mach-O 68 //! images mapped into another process. 69 //! 70 //! This class is capable of reading both `LC_SEGMENT` and `LC_SEGMENT_64` based 71 //! on the bitness of the remote process. 72 //! 73 //! A MachOImageSegmentReader will normally be instantiated by a 74 //! MachOImageReader. 75 class MachOImageSegmentReader { 76 public: 77 MachOImageSegmentReader(); 78 ~MachOImageSegmentReader(); 79 80 //! \brief Reads the segment load command from another process. 81 //! 82 //! This method must only be called once on an object. This method must be 83 //! called successfully before any other method in this class may be called. 84 //! 85 //! \param[in] process_reader The reader for the remote process. 86 //! \param[in] load_command_address The address, in the remote process’ 87 //! address space, where the `LC_SEGMENT` or `LC_SEGMENT_64` load command 88 //! to be read is located. This address is determined by a Mach-O image 89 //! reader, such as MachOImageReader, as it walks Mach-O load commands. 90 //! \param[in] load_command_info A string to be used in logged messages. This 91 //! string is for diagnostic purposes only, and may be empty. 92 //! \param[in] module_name The path used to load the module. This string is 93 //! used to relax otherwise strict parsing rules for common modules with 94 //! known defects. 95 //! \param[in] file_type The module’s Mach-O file type. This is used to relax 96 //! otherwise strict parsing rules for common modules with known defects. 97 //! 98 //! \return `true` if the load command was read successfully. `false` 99 //! otherwise, with an appropriate message logged. 100 bool Initialize(ProcessReaderMac* process_reader, 101 mach_vm_address_t load_command_address, 102 const std::string& load_command_info, 103 const std::string& module_name, 104 uint32_t file_type); 105 106 //! \brief Sets the image’s slide value. 107 //! 108 //! This method must only be called once on an object, after Initialize() is 109 //! called successfully. It must be called before Address(), Size(), 110 //! GetSectionByName(), or GetSectionAtIndex() can be called. 111 //! 112 //! This method is provided because slide is a property of the image that 113 //! cannot be determined until at least some segments have been read. As such, 114 //! it is not necessarily known at the time that Initialize() is called. 115 void SetSlide(mach_vm_size_t slide); 116 117 //! \brief Returns the segment’s name. 118 //! 119 //! The segment’s name is taken from the load command’s `segname` field. 120 //! Common segment names are `"__TEXT"`, `"__DATA"`, and `"__LINKEDIT"`. 121 //! Symbolic constants for these common names are defined in 122 //! `<mach-o/loader.h>`. 123 std::string Name() const; 124 125 //! \return The segment’s actual load address in memory, adjusted for any 126 //! “slide”. 127 //! 128 //! \note For the segment’s preferred load address, not adjusted for slide, 129 //! use vmaddr(). 130 mach_vm_address_t Address() const; 131 132 //! \return The segment’s actual size address in memory, adjusted for any 133 //! growth in the case of a nonsliding segment. 134 //! 135 //! \note For the segment’s preferred size, not adjusted for growth, use 136 //! vmsize(). 137 mach_vm_address_t Size() const; 138 139 //! \brief The segment’s preferred load address. 140 //! 141 //! \return The segment’s preferred load address as stored in the Mach-O file. 142 //! 143 //! \note This value is not adjusted for any “slide” that may have occurred 144 //! when the image was loaded. Use Address() for a value adjusted for 145 //! slide. 146 //! 147 //! \sa MachOImageReader::GetSegmentByName() vmaddr()148 mach_vm_address_t vmaddr() const { return segment_command_.vmaddr; } 149 150 //! \brief Returns the segment’s size as mapped into memory. 151 //! 152 //! \note For non-sliding segments, this value is not adjusted for any growth 153 //! that may have occurred when the image was loaded. Use Size() for a 154 //! value adjusted for growth. vmsize()155 mach_vm_size_t vmsize() const { return segment_command_.vmsize; } 156 157 //! \brief Returns the file offset of the mapped segment in the file from 158 //! which it was mapped. 159 //! 160 //! The file offset is the difference between the beginning of the 161 //! `mach_header` or `mach_header_64` and the beginning of the segment’s 162 //! mapped region. For segments that are not mapped from a file (such as 163 //! `__PAGEZERO` segments), this will be `0`. fileoff()164 mach_vm_size_t fileoff() const { return segment_command_.fileoff; } 165 166 //! \brief Returns the number of sections in the segment. 167 //! 168 //! This will return `0` for a segment without any sections, typical for 169 //! `__PAGEZERO` and `__LINKEDIT` segments. 170 //! 171 //! Although the Mach-O file format uses a `uint32_t` for this field, there is 172 //! an overall limit of 255 sections in an entire Mach-O image file (not just 173 //! in a single segment) imposed by the symbol table format. Symbols will not 174 //! be able to reference anything in a section beyond the first 255 in a 175 //! Mach-O image file. nsects()176 uint32_t nsects() const { return segment_command_.nsects; } 177 178 //! \brief Obtain section information by section name. 179 //! 180 //! \param[in] section_name The name of the section to search for, without the 181 //! leading segment name. For example, use `"__text"`, not 182 //! `"__TEXT,__text"` or `"__TEXT.__text"`. 183 //! \param[out] address The actual address that the section was loaded at in 184 //! memory, taking any “slide” into account if the section did not load at 185 //! its preferred address as stored in the Mach-O image file. This 186 //! parameter can be `nullptr`. 187 //! 188 //! \return A pointer to the section information if it was found, or `nullptr` 189 //! if it was not found. The caller does not take ownership; the lifetime 190 //! of the returned object is scoped to the lifetime of this 191 //! MachOImageSegmentReader object. 192 //! 193 //! \note The process_types::section::addr field gives the section’s preferred 194 //! load address as stored in the Mach-O image file, and is not adjusted 195 //! for any “slide” that may have occurred when the image was loaded. 196 //! 197 //! \sa MachOImageReader::GetSectionByName() 198 const process_types::section* GetSectionByName( 199 const std::string& section_name, 200 mach_vm_address_t* address) const; 201 202 //! \brief Obtain section information by section index. 203 //! 204 //! \param[in] index The index of the section to return, in the order that it 205 //! appears in the segment load command. Unlike 206 //! MachOImageReader::GetSectionAtIndex(), this is a 0-based index. This 207 //! parameter must be in the range of valid indices aas reported by 208 //! nsects(). 209 //! \param[out] address The actual address that the section was loaded at in 210 //! memory, taking any “slide” into account if the section did not load at 211 //! its preferred address as stored in the Mach-O image file. This 212 //! parameter can be `nullptr`. 213 //! 214 //! \return A pointer to the section information. If \a index is out of range, 215 //! execution is aborted. The caller does not take ownership; the 216 //! lifetime of the returned object is scoped to the lifetime of this 217 //! MachOImageSegmentReader object. 218 //! 219 //! \note The process_types::section::addr field gives the section’s preferred 220 //! load address as stored in the Mach-O image file, and is not adjusted 221 //! for any “slide” that may have occurred when the image was loaded. 222 //! \note Unlike MachOImageReader::GetSectionAtIndex(), this method does not 223 //! accept out-of-range values for \a index, and aborts execution instead 224 //! of returning `nullptr` upon encountering an out-of-range value. This 225 //! is because this method is expected to be used in a loop that can be 226 //! limited to nsects() iterations, so an out-of-range error can be 227 //! treated more harshly as a logic error, as opposed to a data error. 228 //! 229 //! \sa MachOImageReader::GetSectionAtIndex() 230 const process_types::section* GetSectionAtIndex( 231 size_t index, 232 mach_vm_address_t* address) const; 233 234 //! Returns whether the segment slides. 235 //! 236 //! Most segments slide, but the `__PAGEZERO` segment does not, it grows 237 //! instead. This method identifies non-sliding segments in the same way that 238 //! the kernel does. 239 bool SegmentSlides() const; 240 241 //! \brief Returns a segment name string. 242 //! 243 //! Segment names may be 16 characters long, and are not necessarily 244 //! `NUL`-terminated. This function will return a segment name based on up to 245 //! the first 16 characters found at \a segment_name_c. 246 static std::string SegmentNameString(const char* segment_name_c); 247 248 //! \brief Returns a section name string. 249 //! 250 //! Section names may be 16 characters long, and are not necessarily 251 //! `NUL`-terminated. This function will return a section name based on up to 252 //! the first 16 characters found at \a section_name_c. 253 static std::string SectionNameString(const char* section_name_c); 254 255 //! \brief Returns a segment and section name string. 256 //! 257 //! A segment and section name string is composed of a segment name string 258 //! (see SegmentNameString()) and a section name string (see 259 //! SectionNameString()) separated by a comma. An example is 260 //! `"__TEXT,__text"`. 261 static std::string SegmentAndSectionNameString(const char* segment_name_c, 262 const char* section_name_c); 263 264 private: 265 //! \brief The internal implementation of Name(). 266 //! 267 //! This is identical to Name() but does not perform the 268 //! InitializationStateDcheck check. It may be called during initialization 269 //! provided that the caller only does so after segment_command_ has been 270 //! read successfully. 271 std::string NameInternal() const; 272 273 // The segment command data read from the remote process. 274 process_types::segment_command segment_command_; 275 276 // Section structures read from the remote process in the order that they are 277 // given in the remote process. 278 std::vector<process_types::section> sections_; 279 280 // Maps section names to indices into the sections_ vector. 281 std::map<std::string, size_t> section_map_; 282 283 // The image’s slide. Note that the segment’s slide may be 0 and not the value 284 // of the image’s slide if SegmentSlides() is false. In that case, the 285 // segment is extended instead of slid, so its size as loaded will be 286 // increased by this value. 287 mach_vm_size_t slide_; 288 289 InitializationStateDcheck initialized_; 290 InitializationStateDcheck initialized_slide_; 291 292 DISALLOW_COPY_AND_ASSIGN(MachOImageSegmentReader); 293 }; 294 295 } // namespace crashpad 296 297 #endif // CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_SEGMENT_READER_H_ 298