1 /* 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. 3 * Use of this source code is governed by a BSD-style license that can be 4 * found in the LICENSE file. 5 */ 6 7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H 8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H 9 10 /* 11 * The SFI validator, and some utility classes it uses. 12 */ 13 14 #include <stdint.h> 15 #include <stdlib.h> 16 #include <vector> 17 18 #include "native_client/src/include/nacl_string.h" 19 #include "native_client/src/include/portability.h" 20 #include "native_client/src/trusted/validator_mips/address_set.h" 21 #include "native_client/src/trusted/validator_mips/decode.h" 22 #include "native_client/src/trusted/validator_mips/inst_classes.h" 23 #include "native_client/src/trusted/validator_mips/model.h" 24 25 namespace nacl_mips_val { 26 27 /* 28 * Forward declarations of classes used by-reference in the validator, and 29 * defined at the end of this file. 30 */ 31 class CodeSegment; 32 class DecodedInstruction; 33 class ProblemSink; 34 35 36 /* 37 * A simple model of an instruction bundle. Bundles consist of one or more 38 * instructions (two or more, in the useful case); the precise size is 39 * controlled by the parameters passed into SfiValidator, below. 40 */ 41 class Bundle { 42 public: Bundle(uint32_t virtual_base,uint32_t size_bytes)43 Bundle(uint32_t virtual_base, uint32_t size_bytes) 44 : virtual_base_(virtual_base), size_(size_bytes) {} 45 BeginAddr()46 uint32_t BeginAddr() const { return virtual_base_; } EndAddr()47 uint32_t EndAddr() const { return virtual_base_ + size_; } 48 49 bool operator!=(const Bundle &other) const { 50 // Note that all Bundles are currently assumed to be the same size. 51 return virtual_base_ != other.virtual_base_; 52 } 53 54 private: 55 uint32_t virtual_base_; 56 uint32_t size_; 57 }; 58 59 60 /* 61 * The SFI validator itself. The validator is controlled by the following 62 * inputs: 63 * bytes_per_bundle: the number of bytes in each bundle of instructions. 64 * Currently this tends to be 16, but we've evaluated alternatives. 65 * code_region_bytes: number of bytes in the code region, starting at address 66 * 0 and including the trampolines, etc. Must be a power of two. 67 * data_region_bits: number of bytes in the data region, starting at address 68 * 0 and including the code region. Must be a power of two. 69 * read_only_registers: registers that untrusted code must not alter (but may 70 * read). This currently applies to t6 - jump mask, t7 - load/store mask 71 * and t8 - tls index. 72 * data_address_registers: registers that must contain a valid data-region 73 * address at all times. This currently applies to the stack pointer and 74 * TLS register but could be extended to include a frame pointer for 75 * C-like languages. Adding register to data_address_registers only means 76 * that load/store access can be done without checks. Check for register 77 * value change still needs to be executed. 78 * 79 * The values of these inputs will typically be taken from the headers of 80 * untrusted code -- either by the ABI version they indicate, or (perhaps in 81 * the future) explicit indicators of what SFI model they follow. 82 */ 83 class SfiValidator { 84 public: 85 SfiValidator(uint32_t bytes_per_bundle, 86 uint32_t code_region_bytes, 87 uint32_t data_region_bytes, 88 nacl_mips_dec::RegisterList read_only_registers, 89 nacl_mips_dec::RegisterList data_address_registers); 90 91 /* 92 * The main validator entry point. Validates the provided CodeSegments, 93 * which must be in sorted order, reporting any problems through the 94 * ProblemSink. 95 * 96 * Returns true iff no problems were found. 97 */ 98 bool Validate(const std::vector<CodeSegment> &, ProblemSink *out); 99 100 // Returns true if validation did not depend on the code's base address. is_position_independent()101 bool is_position_independent() { 102 return is_position_independent_; 103 } 104 105 /* 106 * Checks whether the given Register always holds a valid data region address. 107 * This implies that the register is safe to use in unguarded stores. 108 */ 109 bool IsDataAddressRegister(nacl_mips_dec::Register) const; 110 data_address_mask()111 uint32_t data_address_mask() const { return data_address_mask_; } code_address_mask()112 uint32_t code_address_mask() const { return code_address_mask_; } code_region_bytes()113 uint32_t code_region_bytes() const { return code_region_bytes_; } bytes_per_bundle()114 uint32_t bytes_per_bundle() const { return bytes_per_bundle_; } code_region_start()115 uint32_t code_region_start() const { return code_region_start_; } trampoline_region_start()116 uint32_t trampoline_region_start() const { return trampoline_region_start_; } 117 read_only_registers()118 nacl_mips_dec::RegisterList read_only_registers() const { 119 return read_only_registers_; 120 } data_address_registers()121 nacl_mips_dec::RegisterList data_address_registers() const { 122 return data_address_registers_; 123 } 124 125 // Returns the Bundle containing a given address. 126 const Bundle BundleForAddress(uint32_t) const; 127 128 /* 129 * Change masks: this is useful for debugging and cannot be completely 130 * controlled with constructor arguments 131 */ ChangeMasks(uint32_t code_address_mask,uint32_t data_address_mask)132 void ChangeMasks(uint32_t code_address_mask, uint32_t data_address_mask) { 133 code_address_mask_ = code_address_mask; 134 data_address_mask_ = data_address_mask; 135 } 136 137 /* 138 * Find all the branch instructions which jump on the dest_address. 139 */ 140 bool FindBranch(const std::vector<CodeSegment> &segments, 141 const AddressSet &branches, 142 uint32_t dest_address, 143 std::vector<DecodedInstruction> *instrs) const; 144 145 private: 146 bool IsBundleHead(uint32_t address) const; 147 148 /* 149 * Validates a straight-line execution of the code, applying patterns. This 150 * is the first validation pass, which fills out the AddressSets for 151 * consumption by later analyses. 152 * branches: gets filled in with the address of every direct branch. 153 * branch_targets: gets filled in with the target address of every direct 154 * branch. 155 * critical: gets filled in with every address that isn't safe to jump to, 156 * because it would split an otherwise-safe pseudo-op. 157 * 158 * Returns true iff no problems were found. 159 */ 160 bool ValidateFallthrough(const CodeSegment &, ProblemSink *, 161 AddressSet *branches, AddressSet *branch_targets, 162 AddressSet *critical); 163 164 /* 165 * Factor of validate_fallthrough, above. Checks a single instruction using 166 * the instruction patterns defined in the .cc file, with two possible 167 * results: 168 * 1. No patterns matched, or all were safe: nothing happens. 169 * 2. Patterns matched and were unsafe: problems get sent to 'out'. 170 */ 171 bool ApplyPatterns(const DecodedInstruction &, ProblemSink *out); 172 173 /* 174 * Factor of validate_fallthrough, above. Checks a pair of instructions using 175 * the instruction patterns defined in the .cc file, with three possible 176 * results: 177 * 1. No patterns matched: nothing happens. 178 * 2. Patterns matched and were safe: the addresses are filled into 179 * 'critical' for use by the second pass. 180 * 3. Patterns matched and were unsafe: problems get sent to 'out'. 181 */ 182 bool ApplyPatterns(const DecodedInstruction &first, 183 const DecodedInstruction &second, AddressSet *critical, ProblemSink *out); 184 185 186 /* 187 * 2nd pass - checks if some branch instruction tries to jump onto the middle 188 * of the pseudo-instruction, and if some pseudo-instruction crosses bundle 189 * borders. 190 */ 191 bool ValidatePseudos(const SfiValidator &sfi, 192 const std::vector<CodeSegment> &segments, 193 const AddressSet &branches, 194 const AddressSet &branch_targets, 195 const AddressSet &critical, 196 ProblemSink *out); 197 198 uint32_t const bytes_per_bundle_; 199 uint32_t const code_region_bytes_; 200 uint32_t data_address_mask_; 201 uint32_t code_address_mask_; 202 203 // TODO(petarj): Think about pulling these values from some config header. 204 static uint32_t const code_region_start_ = 0x20000; 205 static uint32_t const trampoline_region_start_ = 0x10000; 206 207 // Registers which cannot be modified by untrusted code. 208 nacl_mips_dec::RegisterList read_only_registers_; 209 // Registers which must always contain a valid data region address. 210 nacl_mips_dec::RegisterList data_address_registers_; 211 const nacl_mips_dec::DecoderState *decode_state_; 212 // True if validation did not depend on the code's base address. 213 bool is_position_independent_; 214 }; 215 216 217 /* 218 * A facade that combines an Instruction with its address and a ClassDecoder. 219 * This makes the patterns substantially easier to write and read than managing 220 * all three variables separately. 221 * 222 * ClassDecoders do all decoding on-demand, with no caching. DecodedInstruction 223 * has knowledge of the validator, and pairs a ClassDecoder with a constant 224 * Instruction -- so it can cache commonly used values, and does. Caching 225 * safety and defs doubles validator performance. Add other values only 226 * under guidance of a profiler. 227 */ 228 class DecodedInstruction { 229 public: 230 DecodedInstruction(uint32_t vaddr, nacl_mips_dec::Instruction inst, 231 const nacl_mips_dec::ClassDecoder &decoder); 232 // We permit the default copy ctor and assignment operator. 233 addr()234 uint32_t addr() const { return vaddr_; } 235 236 // The methods below mirror those on ClassDecoder, but are cached and cheap. safety()237 nacl_mips_dec::SafetyLevel safety() const { return safety_; } 238 239 // The methods below pull values from ClassDecoder on demand. BaseAddressRegister()240 const nacl_mips_dec::Register BaseAddressRegister() const { 241 return decoder_->BaseAddressRegister(inst_); 242 } 243 DestGprReg()244 nacl_mips_dec::Register DestGprReg() const { 245 return decoder_->DestGprReg(inst_); 246 } 247 TargetReg()248 nacl_mips_dec::Register TargetReg() const { 249 return decoder_->TargetReg(inst_); 250 } 251 DestAddr()252 uint32_t DestAddr() const { 253 return decoder_->DestAddr(inst_, addr()); 254 } 255 HasDelaySlot()256 bool HasDelaySlot() const { 257 return decoder_-> HasDelaySlot(); 258 } 259 IsJal()260 bool IsJal() const { 261 return decoder_-> IsJal(); 262 } 263 IsMask(const nacl_mips_dec::Register & dest,const nacl_mips_dec::Register & mask)264 bool IsMask(const nacl_mips_dec::Register& dest, 265 const nacl_mips_dec::Register& mask) const { 266 return decoder_->IsMask(inst_, dest, mask); 267 } 268 IsJmpReg()269 bool IsJmpReg() const { 270 return decoder_->IsJmpReg(); 271 } 272 IsLoadStore()273 bool IsLoadStore() const { 274 return decoder_->IsLoadStore(); 275 } 276 IsLoadWord()277 bool IsLoadWord() const { 278 return decoder_->IsLoadWord(); 279 } 280 GetImm()281 uint32_t GetImm() const { 282 return decoder_->GetImm(inst_); 283 } 284 IsDirectJump()285 bool IsDirectJump() const { 286 return decoder_->IsDirectJump(); 287 } 288 IsDestGprReg(nacl_mips_dec::RegisterList rl)289 bool IsDestGprReg(nacl_mips_dec::RegisterList rl) const { 290 return rl.ContainsAny(nacl_mips_dec::RegisterList(DestGprReg())); 291 } 292 IsDataRegMask()293 bool IsDataRegMask() const { 294 return IsMask(DestGprReg(), nacl_mips_dec::Register::LoadStoreMask()); 295 } 296 297 private: 298 uint32_t vaddr_; 299 nacl_mips_dec::Instruction inst_; 300 const nacl_mips_dec::ClassDecoder *decoder_; 301 302 nacl_mips_dec::SafetyLevel safety_; 303 }; 304 305 306 /* 307 * Describes a memory region that contains executable code. Note that the code 308 * need not live in its final location -- we pretend the code lives at the 309 * provided start_addr, regardless of where the base pointer actually points. 310 */ 311 class CodeSegment { 312 public: CodeSegment(const uint8_t * base,uint32_t start_addr,size_t size)313 CodeSegment(const uint8_t *base, uint32_t start_addr, size_t size) 314 : base_(base), start_addr_(start_addr), size_(size) {} 315 BeginAddr()316 uint32_t BeginAddr() const { return start_addr_; } EndAddr()317 uint32_t EndAddr() const { return start_addr_ + size_; } size()318 uint32_t size() const { return size_; } ContainsAddress(uint32_t a)319 bool ContainsAddress(uint32_t a) const { 320 return (a >= BeginAddr()) && (a < EndAddr()); 321 } 322 323 const nacl_mips_dec::Instruction operator[](uint32_t address) const { 324 const uint8_t *element = &base_[address - start_addr_]; 325 return nacl_mips_dec::Instruction( 326 *reinterpret_cast<const uint32_t *>(element)); 327 } 328 329 bool operator<(const CodeSegment &other) const { 330 return start_addr_ < other.start_addr_; 331 } 332 333 private: 334 const uint8_t *base_; 335 uint32_t start_addr_; 336 size_t size_; 337 }; 338 339 340 /* 341 * A class that consumes reports of validation problems, and may decide whether 342 * to continue validating, or early-exit. 343 * 344 * In a sel_ldr context, we early-exit at the first problem we find. In an SDK 345 * context, however, we collect more reports to give the developer feedback; 346 * even then it may be desirable to exit after the first, say, 200 reports. 347 */ 348 class ProblemSink { 349 public: ~ProblemSink()350 virtual ~ProblemSink() {} 351 352 /* 353 * Reports a problem in untrusted code. 354 * vaddr: the virtual address where the problem occurred. Note that this is 355 * probably not the address of memory that contains the offending 356 * instruction, since we allow CodeSegments to lie about their base 357 * addresses. 358 * safety: the safety level of the instruction, as reported by the decoder. 359 * This may be MAY_BE_SAFE while still indicating a problem. 360 * problem_code: a constant string, defined below, that uniquely identifies 361 * the problem. These are not intended to be human-readable, and should 362 * be looked up for localization and presentation to the developer. 363 * ref_vaddr: A second virtual address of more code that affected the 364 * decision -- typically a branch target. 365 */ 366 virtual void ReportProblem(uint32_t vaddr, nacl_mips_dec::SafetyLevel safety, 367 const nacl::string &problem_code, uint32_t ref_vaddr = 0) { 368 UNREFERENCED_PARAMETER(vaddr); 369 UNREFERENCED_PARAMETER(safety); 370 UNREFERENCED_PARAMETER(problem_code); 371 UNREFERENCED_PARAMETER(ref_vaddr); 372 } 373 374 /* 375 * Called after each invocation of report_problem. If this returns false, 376 * the validator exits. 377 */ ShouldContinue()378 virtual bool ShouldContinue() { return false; } 379 }; 380 381 /* 382 * Strings used to describe the current set of validator problems. These may 383 * be worth splitting into a separate header file, so that dev tools can 384 * process them into localized messages without needing to pull in the whole 385 * validator...we'll see. 386 */ 387 388 // An instruction is unsafe -- more information in the SafetyLevel. 389 const char * const kProblemUnsafe = "kProblemUnsafe"; 390 // A branch would break a pseudo-operation pattern. 391 const char * const kProblemBranchSplitsPattern = "kProblemBranchSplitsPattern"; 392 // A branch targets an invalid code address (out of segment). 393 const char * const kProblemBranchInvalidDest = "kProblemBranchInvalidDest"; 394 // A load/store uses an unsafe (non-masked) base address. 395 const char * const kProblemUnsafeLoadStore = "kProblemUnsafeLoadStore"; 396 // A thread pointer load/store is unsafe. 397 const char * const kProblemUnsafeLoadStoreThreadPointer = 398 "kProblemUnsafeLoadStoreThreadPointer"; 399 // An instruction updates a data-address register (e.g. SP) without masking. 400 const char * const kProblemUnsafeDataWrite = "kProblemUnsafeDataWrite"; 401 // An instruction updates a read-only register (e.g. t6, t7, t8). 402 const char * const kProblemReadOnlyRegister = "kProblemReadOnlyRegister"; 403 // A pseudo-op pattern crosses a bundle boundary. 404 const char * const kProblemPatternCrossesBundle = 405 "kProblemPatternCrossesBundle"; 406 // A linking branch instruction is not in the last bundle slot. 407 const char * const kProblemMisalignedCall = "kProblemMisalignedCall"; 408 // A data register is found in a branch delay slot. 409 const char * const kProblemDataRegInDelaySlot = "kProblemDataRegInDelaySlot"; 410 // A jump to trampoline instruction which is not a start of a bundle. 411 const char * const kProblemUnalignedJumpToTrampoline = 412 "kProblemUnalignedJumpToTrampoline"; 413 // A jump register instruction is not guarded. 414 const char * const kProblemUnsafeJumpRegister = "kProblemUnsafeJumpRegister"; 415 // Two consecutive branches/jumps. Branch/jump in the delay slot. 416 const char * const kProblemBranchInDelaySlot = "kProblemBranchInDelaySlot"; 417 } // namespace nacl_mips_val 418 419 #endif // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H 420