1 /*
2  * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
9 
10 /*
11  * The SFI validator, and some utility classes it uses.
12  */
13 
14 #include <stdint.h>
15 #include <stdlib.h>
16 #include <vector>
17 
18 #include "native_client/src/include/nacl_string.h"
19 #include "native_client/src/include/portability.h"
20 #include "native_client/src/trusted/validator_mips/address_set.h"
21 #include "native_client/src/trusted/validator_mips/decode.h"
22 #include "native_client/src/trusted/validator_mips/inst_classes.h"
23 #include "native_client/src/trusted/validator_mips/model.h"
24 
25 namespace nacl_mips_val {
26 
27 /*
28  * Forward declarations of classes used by-reference in the validator, and
29  * defined at the end of this file.
30  */
31 class CodeSegment;
32 class DecodedInstruction;
33 class ProblemSink;
34 
35 
36 /*
37  * A simple model of an instruction bundle.  Bundles consist of one or more
38  * instructions (two or more, in the useful case); the precise size is
39  * controlled by the parameters passed into SfiValidator, below.
40  */
41 class Bundle {
42  public:
Bundle(uint32_t virtual_base,uint32_t size_bytes)43   Bundle(uint32_t virtual_base, uint32_t size_bytes)
44       : virtual_base_(virtual_base), size_(size_bytes) {}
45 
BeginAddr()46   uint32_t BeginAddr() const { return virtual_base_; }
EndAddr()47   uint32_t EndAddr() const { return virtual_base_ + size_; }
48 
49   bool operator!=(const Bundle &other) const {
50     // Note that all Bundles are currently assumed to be the same size.
51     return virtual_base_ != other.virtual_base_;
52   }
53 
54  private:
55   uint32_t virtual_base_;
56   uint32_t size_;
57 };
58 
59 
60 /*
61  * The SFI validator itself.  The validator is controlled by the following
62  * inputs:
63  *   bytes_per_bundle: the number of bytes in each bundle of instructions.
64  *       Currently this tends to be 16, but we've evaluated alternatives.
65  *   code_region_bytes: number of bytes in the code region, starting at address
66  *       0 and including the trampolines, etc.  Must be a power of two.
67  *   data_region_bits: number of bytes in the data region, starting at address
68  *       0 and including the code region.  Must be a power of two.
69  *   read_only_registers: registers that untrusted code must not alter (but may
70  *       read).  This currently applies to t6 - jump mask, t7 - load/store mask
71  *       and t8 - tls index.
72  *   data_address_registers: registers that must contain a valid data-region
73  *       address at all times.  This currently applies to the stack pointer and
74  *       TLS register but could be extended to include a frame pointer for
75  *       C-like languages. Adding register to data_address_registers only means
76  *       that load/store access can be done without checks. Check for register
77  *       value change still needs to be executed.
78  *
79  * The values of these inputs will typically be taken from the headers of
80  * untrusted code -- either by the ABI version they indicate, or (perhaps in
81  * the future) explicit indicators of what SFI model they follow.
82  */
83 class SfiValidator {
84  public:
85   SfiValidator(uint32_t bytes_per_bundle,
86                uint32_t code_region_bytes,
87                uint32_t data_region_bytes,
88                nacl_mips_dec::RegisterList read_only_registers,
89                nacl_mips_dec::RegisterList data_address_registers);
90 
91   /*
92    * The main validator entry point.  Validates the provided CodeSegments,
93    * which must be in sorted order, reporting any problems through the
94    * ProblemSink.
95    *
96    * Returns true iff no problems were found.
97    */
98   bool Validate(const std::vector<CodeSegment> &, ProblemSink *out);
99 
100   // Returns true if validation did not depend on the code's base address.
is_position_independent()101   bool is_position_independent() {
102     return is_position_independent_;
103   }
104 
105   /*
106    * Checks whether the given Register always holds a valid data region address.
107    * This implies that the register is safe to use in unguarded stores.
108    */
109   bool IsDataAddressRegister(nacl_mips_dec::Register) const;
110 
data_address_mask()111   uint32_t data_address_mask() const { return data_address_mask_; }
code_address_mask()112   uint32_t code_address_mask() const { return code_address_mask_; }
code_region_bytes()113   uint32_t code_region_bytes() const { return code_region_bytes_; }
bytes_per_bundle()114   uint32_t bytes_per_bundle() const { return bytes_per_bundle_; }
code_region_start()115   uint32_t code_region_start() const { return code_region_start_; }
trampoline_region_start()116   uint32_t trampoline_region_start() const { return trampoline_region_start_; }
117 
read_only_registers()118   nacl_mips_dec::RegisterList read_only_registers() const {
119     return read_only_registers_;
120   }
data_address_registers()121   nacl_mips_dec::RegisterList data_address_registers() const {
122     return data_address_registers_;
123   }
124 
125   // Returns the Bundle containing a given address.
126   const Bundle BundleForAddress(uint32_t) const;
127 
128   /*
129    * Change masks: this is useful for debugging and cannot be completely
130    *               controlled with constructor arguments
131    */
ChangeMasks(uint32_t code_address_mask,uint32_t data_address_mask)132   void ChangeMasks(uint32_t code_address_mask, uint32_t data_address_mask) {
133     code_address_mask_ = code_address_mask;
134     data_address_mask_ = data_address_mask;
135   }
136 
137   /*
138    * Find all the branch instructions which jump on the dest_address.
139    */
140   bool FindBranch(const std::vector<CodeSegment> &segments,
141                   const AddressSet &branches,
142                   uint32_t dest_address,
143                   std::vector<DecodedInstruction> *instrs) const;
144 
145  private:
146   bool IsBundleHead(uint32_t address) const;
147 
148   /*
149    * Validates a straight-line execution of the code, applying patterns.  This
150    * is the first validation pass, which fills out the AddressSets for
151    * consumption by later analyses.
152    *   branches: gets filled in with the address of every direct branch.
153    *   branch_targets: gets filled in with the target address of every direct
154    *   branch.
155    *   critical: gets filled in with every address that isn't safe to jump to,
156    *       because it would split an otherwise-safe pseudo-op.
157    *
158    * Returns true iff no problems were found.
159    */
160   bool ValidateFallthrough(const CodeSegment &, ProblemSink *,
161                            AddressSet *branches, AddressSet *branch_targets,
162                            AddressSet *critical);
163 
164   /*
165    * Factor of validate_fallthrough, above.  Checks a single instruction using
166    * the instruction patterns defined in the .cc file, with two possible
167    * results:
168    *   1. No patterns matched, or all were safe: nothing happens.
169    *   2. Patterns matched and were unsafe: problems get sent to 'out'.
170    */
171   bool ApplyPatterns(const DecodedInstruction &, ProblemSink *out);
172 
173   /*
174    * Factor of validate_fallthrough, above.  Checks a pair of instructions using
175    * the instruction patterns defined in the .cc file, with three possible
176    * results:
177    *   1. No patterns matched: nothing happens.
178    *   2. Patterns matched and were safe: the addresses are filled into
179    *      'critical' for use by the second pass.
180    *   3. Patterns matched and were unsafe: problems get sent to 'out'.
181    */
182   bool ApplyPatterns(const DecodedInstruction &first,
183       const DecodedInstruction &second, AddressSet *critical, ProblemSink *out);
184 
185 
186   /*
187    * 2nd pass - checks if some branch instruction tries to jump onto the middle
188    * of the pseudo-instruction, and if some pseudo-instruction crosses bundle
189    * borders.
190    */
191   bool ValidatePseudos(const SfiValidator &sfi,
192                        const std::vector<CodeSegment> &segments,
193                        const AddressSet &branches,
194                        const AddressSet &branch_targets,
195                        const AddressSet &critical,
196                        ProblemSink *out);
197 
198   uint32_t const bytes_per_bundle_;
199   uint32_t const code_region_bytes_;
200   uint32_t data_address_mask_;
201   uint32_t code_address_mask_;
202 
203   // TODO(petarj): Think about pulling these values from some config header.
204   static uint32_t const code_region_start_ = 0x20000;
205   static uint32_t const trampoline_region_start_ = 0x10000;
206 
207   // Registers which cannot be modified by untrusted code.
208   nacl_mips_dec::RegisterList read_only_registers_;
209   // Registers which must always contain a valid data region address.
210   nacl_mips_dec::RegisterList data_address_registers_;
211   const nacl_mips_dec::DecoderState *decode_state_;
212   // True if validation did not depend on the code's base address.
213   bool is_position_independent_;
214 };
215 
216 
217 /*
218  * A facade that combines an Instruction with its address and a ClassDecoder.
219  * This makes the patterns substantially easier to write and read than managing
220  * all three variables separately.
221  *
222  * ClassDecoders do all decoding on-demand, with no caching.  DecodedInstruction
223  * has knowledge of the validator, and pairs a ClassDecoder with a constant
224  * Instruction -- so it can cache commonly used values, and does.  Caching
225  * safety and defs doubles validator performance.  Add other values only
226  * under guidance of a profiler.
227  */
228 class DecodedInstruction {
229  public:
230   DecodedInstruction(uint32_t vaddr, nacl_mips_dec::Instruction inst,
231       const nacl_mips_dec::ClassDecoder &decoder);
232   // We permit the default copy ctor and assignment operator.
233 
addr()234   uint32_t addr() const { return vaddr_; }
235 
236   // The methods below mirror those on ClassDecoder, but are cached and cheap.
safety()237   nacl_mips_dec::SafetyLevel safety() const { return safety_; }
238 
239   // The methods below pull values from ClassDecoder on demand.
BaseAddressRegister()240   const nacl_mips_dec::Register BaseAddressRegister() const {
241     return decoder_->BaseAddressRegister(inst_);
242   }
243 
DestGprReg()244   nacl_mips_dec::Register DestGprReg() const {
245     return decoder_->DestGprReg(inst_);
246   }
247 
TargetReg()248   nacl_mips_dec::Register TargetReg() const {
249     return decoder_->TargetReg(inst_);
250   }
251 
DestAddr()252   uint32_t DestAddr() const {
253     return decoder_->DestAddr(inst_, addr());
254   }
255 
HasDelaySlot()256   bool HasDelaySlot() const {
257     return decoder_-> HasDelaySlot();
258   }
259 
IsJal()260   bool IsJal() const {
261     return decoder_-> IsJal();
262   }
263 
IsMask(const nacl_mips_dec::Register & dest,const nacl_mips_dec::Register & mask)264   bool IsMask(const nacl_mips_dec::Register& dest,
265               const nacl_mips_dec::Register& mask) const {
266     return decoder_->IsMask(inst_, dest, mask);
267   }
268 
IsJmpReg()269   bool IsJmpReg() const {
270     return decoder_->IsJmpReg();
271   }
272 
IsLoadStore()273   bool IsLoadStore() const {
274     return decoder_->IsLoadStore();
275   }
276 
IsLoadWord()277   bool IsLoadWord() const {
278     return decoder_->IsLoadWord();
279   }
280 
GetImm()281   uint32_t GetImm() const {
282     return decoder_->GetImm(inst_);
283   }
284 
IsDirectJump()285   bool IsDirectJump() const {
286     return decoder_->IsDirectJump();
287   }
288 
IsDestGprReg(nacl_mips_dec::RegisterList rl)289   bool IsDestGprReg(nacl_mips_dec::RegisterList rl) const {
290     return rl.ContainsAny(nacl_mips_dec::RegisterList(DestGprReg()));
291   }
292 
IsDataRegMask()293   bool IsDataRegMask() const {
294     return IsMask(DestGprReg(), nacl_mips_dec::Register::LoadStoreMask());
295   }
296 
297  private:
298   uint32_t vaddr_;
299   nacl_mips_dec::Instruction inst_;
300   const nacl_mips_dec::ClassDecoder *decoder_;
301 
302   nacl_mips_dec::SafetyLevel safety_;
303 };
304 
305 
306 /*
307  * Describes a memory region that contains executable code.  Note that the code
308  * need not live in its final location -- we pretend the code lives at the
309  * provided start_addr, regardless of where the base pointer actually points.
310  */
311 class CodeSegment {
312  public:
CodeSegment(const uint8_t * base,uint32_t start_addr,size_t size)313   CodeSegment(const uint8_t *base, uint32_t start_addr, size_t size)
314       : base_(base), start_addr_(start_addr), size_(size) {}
315 
BeginAddr()316   uint32_t BeginAddr() const { return start_addr_; }
EndAddr()317   uint32_t EndAddr() const { return start_addr_ + size_; }
size()318   uint32_t size() const { return size_; }
ContainsAddress(uint32_t a)319   bool ContainsAddress(uint32_t a) const {
320     return (a >= BeginAddr()) && (a < EndAddr());
321   }
322 
323   const nacl_mips_dec::Instruction operator[](uint32_t address) const {
324     const uint8_t *element = &base_[address - start_addr_];
325     return nacl_mips_dec::Instruction(
326         *reinterpret_cast<const uint32_t *>(element));
327   }
328 
329   bool operator<(const CodeSegment &other) const {
330     return start_addr_ < other.start_addr_;
331   }
332 
333  private:
334   const uint8_t *base_;
335   uint32_t start_addr_;
336   size_t size_;
337 };
338 
339 
340 /*
341  * A class that consumes reports of validation problems, and may decide whether
342  * to continue validating, or early-exit.
343  *
344  * In a sel_ldr context, we early-exit at the first problem we find.  In an SDK
345  * context, however, we collect more reports to give the developer feedback;
346  * even then it may be desirable to exit after the first, say, 200 reports.
347  */
348 class ProblemSink {
349  public:
~ProblemSink()350   virtual ~ProblemSink() {}
351 
352   /*
353    * Reports a problem in untrusted code.
354    *  vaddr: the virtual address where the problem occurred.  Note that this is
355    *      probably not the address of memory that contains the offending
356    *      instruction, since we allow CodeSegments to lie about their base
357    *      addresses.
358    *  safety: the safety level of the instruction, as reported by the decoder.
359    *      This may be MAY_BE_SAFE while still indicating a problem.
360    *  problem_code: a constant string, defined below, that uniquely identifies
361    *      the problem.  These are not intended to be human-readable, and should
362    *      be looked up for localization and presentation to the developer.
363    *  ref_vaddr: A second virtual address of more code that affected the
364    *      decision -- typically a branch target.
365    */
366   virtual void ReportProblem(uint32_t vaddr, nacl_mips_dec::SafetyLevel safety,
367       const nacl::string &problem_code, uint32_t ref_vaddr = 0) {
368     UNREFERENCED_PARAMETER(vaddr);
369     UNREFERENCED_PARAMETER(safety);
370     UNREFERENCED_PARAMETER(problem_code);
371     UNREFERENCED_PARAMETER(ref_vaddr);
372   }
373 
374   /*
375    * Called after each invocation of report_problem.  If this returns false,
376    * the validator exits.
377    */
ShouldContinue()378   virtual bool ShouldContinue() { return false; }
379 };
380 
381 /*
382  * Strings used to describe the current set of validator problems.  These may
383  * be worth splitting into a separate header file, so that dev tools can
384  * process them into localized messages without needing to pull in the whole
385  * validator...we'll see.
386  */
387 
388 // An instruction is unsafe -- more information in the SafetyLevel.
389 const char * const kProblemUnsafe = "kProblemUnsafe";
390 // A branch would break a pseudo-operation pattern.
391 const char * const kProblemBranchSplitsPattern = "kProblemBranchSplitsPattern";
392 // A branch targets an invalid code address (out of segment).
393 const char * const kProblemBranchInvalidDest = "kProblemBranchInvalidDest";
394 // A load/store uses an unsafe (non-masked) base address.
395 const char * const kProblemUnsafeLoadStore = "kProblemUnsafeLoadStore";
396 // A thread pointer load/store is unsafe.
397 const char * const kProblemUnsafeLoadStoreThreadPointer =
398     "kProblemUnsafeLoadStoreThreadPointer";
399 // An instruction updates a data-address register (e.g. SP) without masking.
400 const char * const kProblemUnsafeDataWrite = "kProblemUnsafeDataWrite";
401 // An instruction updates a read-only register (e.g. t6, t7, t8).
402 const char * const kProblemReadOnlyRegister = "kProblemReadOnlyRegister";
403 // A pseudo-op pattern crosses a bundle boundary.
404 const char * const kProblemPatternCrossesBundle =
405     "kProblemPatternCrossesBundle";
406 // A linking branch instruction is not in the last bundle slot.
407 const char * const kProblemMisalignedCall = "kProblemMisalignedCall";
408 // A data register is found in a branch delay slot.
409 const char * const kProblemDataRegInDelaySlot = "kProblemDataRegInDelaySlot";
410 // A jump to trampoline instruction which is not a start of a bundle.
411 const char * const kProblemUnalignedJumpToTrampoline =
412     "kProblemUnalignedJumpToTrampoline";
413 // A jump register instruction is not guarded.
414 const char * const kProblemUnsafeJumpRegister = "kProblemUnsafeJumpRegister";
415 // Two consecutive branches/jumps. Branch/jump in the delay slot.
416 const char * const kProblemBranchInDelaySlot = "kProblemBranchInDelaySlot";
417 }  // namespace nacl_mips_val
418 
419 #endif  // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_MIPS_VALIDATOR_H
420