1 /*
2  * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
9 
10 /*
11  * The SFI validator, and some utility classes it uses.
12  */
13 
14 #include <limits>
15 #include <stdlib.h>
16 #include <vector>
17 
18 #include "native_client/src/include/nacl_compiler_annotations.h"
19 #include "native_client/src/include/nacl_string.h"
20 #include "native_client/src/include/portability.h"
21 #include "native_client/src/shared/platform/nacl_check.h"
22 #include "native_client/src/trusted/validator/ncvalidate.h"
23 #include "native_client/src/trusted/validator_arm/address_set.h"
24 #include "native_client/src/trusted/cpu_features/arch/arm/cpu_arm.h"
25 #include "native_client/src/trusted/validator_arm/gen/arm32_decode.h"
26 #include "native_client/src/trusted/validator_arm/inst_classes.h"
27 #include "native_client/src/trusted/validator_arm/model.h"
28 
29 namespace nacl_arm_val {
30 
31 // Forward declarations of classes used by-reference in the validator, and
32 // defined at the end of this file.
33 class CodeSegment;
34 class DecodedInstruction;
35 class ProblemSink;
36 
37 // A simple model of an instruction bundle.  Bundles consist of one or more
38 // instructions (two or more, in the useful case); the precise size is
39 // controlled by the parameters passed into SfiValidator, below.
40 class Bundle {
41  public:
Bundle(uint32_t virtual_base,uint32_t size_bytes)42   Bundle(uint32_t virtual_base, uint32_t size_bytes)
43       : virtual_base_(virtual_base), size_(size_bytes) {}
44 
begin_addr()45   uint32_t begin_addr() const { return virtual_base_; }
end_addr()46   uint32_t end_addr() const { return virtual_base_ + size_; }
47 
48   bool operator==(const Bundle& other) const {
49     // Note that all Bundles are currently assumed to be the same size.
50     return virtual_base_ == other.virtual_base_;
51   }
52 
53  private:
54   uint32_t virtual_base_;
55   uint32_t size_;
56 };
57 
58 // The SFI validator itself.  The validator is controlled by the following
59 // inputs:
60 //   bytes_per_bundle - the number of bytes in each bundle of instructions.
61 //       Currently this tends to be 16, but we've evaluated alternatives.
62 //       Must be a power of two.
63 //   code_region_bytes - number of bytes in the code region, starting at address
64 //       0 and including the trampolines, etc.  Must be a power of two.
65 //   data_region_bits - number of bytes in the data region, starting at address
66 //       0 and including the code region.  Must be a power of two.
67 //   read_only_registers - registers that untrusted code must not alter (but may
68 //       read). This currently applies to r9, where we store some thread state.
69 //   data_address_registers - registers that must contain a valid data-region
70 //       address at all times.  This currently applies to the stack pointer, but
71 //       could be extended to include a frame pointer for C-like languages.
72 //   cpu_features - the ARM CPU whose features should be considered during
73 //       validation. This matters because some CPUs don't support some
74 //       instructions, leak information or have erratas when others do not,
75 //       yet we still want to emit performant code for the given target.
76 //
77 // The values of these inputs will typically be taken from the headers of
78 // untrusted code -- either by the ABI version they indicate, or (perhaps in
79 // the future) explicit indicators of what SFI model they follow.
80 class SfiValidator {
81  public:
82   SfiValidator(uint32_t bytes_per_bundle,
83                uint32_t code_region_bytes,
84                uint32_t data_region_bytes,
85                nacl_arm_dec::RegisterList read_only_registers,
86                nacl_arm_dec::RegisterList data_address_registers,
87                const NaClCPUFeaturesArm *cpu_features);
88 
89   explicit SfiValidator(const SfiValidator& v);
90 
91   // The main validator entry point.  Validates the provided CodeSegments,
92   // which must be in sorted order, reporting any problems through the
93   // ProblemSink.
94   //
95   // Returns true iff no problems were found.
validate(const std::vector<CodeSegment> & segments,ProblemSink * out)96   bool validate(const std::vector<CodeSegment>& segments, ProblemSink* out) {
97     return find_violations(segments, out) == nacl_arm_dec::kNoViolations;
98   }
99 
100   // Returns true if validation did not depend on the code's base address.
is_position_independent()101   bool is_position_independent() {
102     return is_position_independent_;
103   }
104 
105   // Alternate validator entry point. Validates the provided
106   // CodeSegments, which must be in sorted order, reporting any
107   // problems through the ProblemSink.
108   //
109   // Returns the violation set of found violations. Note: if problem
110   // sink short ciruits the validation of all code (via method
111   // should_continue), this set may not contain all types of
112   // violations found. All that this method guarantees is if the code
113   // has validation violations, the returned set will be non-empty.
114   //
115   // Note: This version of validating is useful for testing, when one
116   // might want to know why the code did not validate.
117   nacl_arm_dec::ViolationSet find_violations(
118       const std::vector<CodeSegment>& segments, ProblemSink* out);
119 
120   // Entry point for validation of dynamic code replacement. Allows
121   // micromodifications of dynamically generated code in form of
122   // constant updates for inline caches and similar VM techniques.
123   // Very minimal modifications allowed, essentially only immediate
124   // value update for MOV or ORR instruction.
125   // Returns true iff no problems were found.
126   bool ValidateSegmentPair(const CodeSegment& old_code,
127                            const CodeSegment& new_code,
128                            ProblemSink* out);
129 
130   // Entry point for dynamic code creation. Copies code from
131   // source segment to destination, performing validation
132   // and accounting for need of safe handling of cases,
133   // where code being replaced is executed.
134   // Returns true iff no problems were found.
135   bool CopyCode(const CodeSegment& source_code,
136                 CodeSegment* dest_code,
137                 NaClCopyInstructionFunc copy_func,
138                 ProblemSink* out);
139 
140   // A 2-dimensional array, defined on the Condition of two
141   // instructions, defining when we can statically prove that the
142   // conditions of the first instruction implies the conditions of the
143   // second instruction.
144   //
145   // Note: The first index (i.e. row) corresponds to the condition of
146   // the first instruction, while the second index (i.e. column)
147   // corresponds to the condition of the second instruction.
148   //
149   // Note: The order the instructions execute is not important in
150   // this array. The context defines which instruction, of the
151   // instruction pair being compared, appears first.
152   //
153   // Note: The decoder should prevent UNCONDITIONAL (0b1111) from ever
154   // occurring, but we include entries for it out of paranoia, which also
155   // happens to make the table 16x16, which is easier to index into.
156   static const bool
157   condition_implies[nacl_arm_dec::Instruction::kConditionSize + 1]
158                    [nacl_arm_dec::Instruction::kConditionSize + 1];
159 
160   // Checks whether the given Register always holds a valid data region address.
161   // This implies that the register is safe to use in unguarded stores.
is_data_address_register(nacl_arm_dec::Register r)162   bool is_data_address_register(nacl_arm_dec::Register r) const {
163     return data_address_registers_.Contains(r);
164   }
165 
166   // Number of A32 instructions per bundle.
InstructionsPerBundle()167   uint32_t InstructionsPerBundle() const {
168     return bytes_per_bundle_ / (nacl_arm_dec::kArm32InstSize / 8);
169   }
170 
code_address_mask()171   uint32_t code_address_mask() const {
172     return ~(code_region_bytes_ - 1) | (bytes_per_bundle_ - 1);
173   }
data_address_mask()174   uint32_t data_address_mask() const {
175     return ~(data_region_bytes_ - 1);
176   }
177 
read_only_registers()178   nacl_arm_dec::RegisterList read_only_registers() const {
179     return read_only_registers_;
180   }
data_address_registers()181   nacl_arm_dec::RegisterList data_address_registers() const {
182     return data_address_registers_;
183   }
184 
CpuFeatures()185   const NaClCPUFeaturesArm *CpuFeatures() const {
186     return &cpu_features_;
187   }
188 
conditional_memory_access_allowed_for_sfi()189   bool conditional_memory_access_allowed_for_sfi() const {
190     return NaClGetCPUFeatureArm(CpuFeatures(),
191                                 NaClCPUFeatureArm_CanUseTstMem) != 0;
192   }
193 
194   // Utility function that applies the decoder of the validator.
decode(nacl_arm_dec::Instruction inst)195   const nacl_arm_dec::ClassDecoder& decode(
196       nacl_arm_dec::Instruction inst) const {
197     return decode_state_.decode(inst);
198   }
199 
200   // Returns the Bundle containing a given address.
201   inline const Bundle bundle_for_address(uint32_t address) const;
202 
203   // Returns true if both addresses are in the same bundle.
204   inline bool in_same_bundle(const DecodedInstruction& first,
205                              const DecodedInstruction& second) const;
206 
207   // Checks that both instructions can be in the same bundle,
208   // add updates the critical set to include the second instruction,
209   // since it can't be safely jumped to. If the instruction crosses
210   // a bundle, a set with the given violation will be returned.
211   inline nacl_arm_dec::ViolationSet validate_instruction_pair_allowed(
212       const DecodedInstruction& first,
213       const DecodedInstruction& second,
214       AddressSet* critical,
215       nacl_arm_dec::Violation violation) const;
216 
217   // Copy the given validator state.
218   SfiValidator& operator=(const SfiValidator& v);
219 
220   // Returns true if address is the first address of a bundle.
is_bundle_head(uint32_t address)221   bool is_bundle_head(uint32_t address) const {
222     return (address & (bytes_per_bundle_ - 1)) == 0;
223   }
224 
225   // Returns true if address is on a valid inst boundary
226   // and is not within a pseudo instruction
227   bool is_valid_inst_boundary(const CodeSegment& code, uint32_t addr);
228 
229  private:
230   // The SfiValidator constructor could have been given invalid values.
231   // Returns true the values were bad, and send the details to the ProblemSink.
232   // This method should be called from every public validation method.
233   bool ConstructionFailed(ProblemSink* out);
234 
235   // Validates a straight-line execution of the code, applying patterns.  This
236   // is the first validation pass, which fills out the AddressSets for
237   // consumption by later analyses.
238   //   branches - gets filled in with the address of every direct branch.
239   //   critical - gets filled in with every address that isn't safe to jump to,
240   //       because it would split an otherwise-safe pseudo-op.
241   //
242   // Returns the violation set of found violations. Note: if problem
243   // sink short ciruits the validation of all code (via method
244   // should_continue), this set may not contain all types of
245   // violations found. All that this method guarantees is if the code
246   // has validation violations, the returned set will be non-empty.
247   nacl_arm_dec::ViolationSet validate_fallthrough(
248       const CodeSegment& segment, ProblemSink* out,
249       AddressSet* branches, AddressSet* critical);
250 
251   // Validates all branches found by a previous pass, checking
252   // destinations.  Returns the violation set of found branch
253   // violations. Note: if problem sink short ciruits the validation of
254   // all code (via method should_continue), this set may not contain
255   // all types of violations found. All that this method guarantees is
256   // if the code has validation violations, the returned set will be
257   // non-empty.
258   nacl_arm_dec::ViolationSet validate_branches(
259       const std::vector<CodeSegment>& segments,
260       const AddressSet& branches, const AddressSet& critical,
261       ProblemSink* out);
262 
263   NaClCPUFeaturesArm cpu_features_;
264   uint32_t bytes_per_bundle_;
265   uint32_t code_region_bytes_;
266   uint32_t data_region_bytes_;
267   // Registers which cannot be modified by untrusted code.
268   nacl_arm_dec::RegisterList read_only_registers_;
269   // Registers which must always contain a valid data region address.
270   nacl_arm_dec::RegisterList data_address_registers_;
271   // Defines the decoder parser to use.
272   const nacl_arm_dec::Arm32DecoderState decode_state_;
273   // True if construction failed and further validation should be prevented.
274   bool construction_failed_;
275   // True if validation did not depend on the code's base address.
276   bool is_position_independent_;
277 };
278 
279 
280 // A facade that combines an Instruction with its address and a ClassDecoder.
281 // This makes the patterns substantially easier to write and read than managing
282 // all three variables separately.
283 //
284 // ClassDecoders do all decoding on-demand, with no caching.  DecodedInstruction
285 // has knowledge of the validator, and pairs a ClassDecoder with a constant
286 // Instruction -- so it can cache commonly used values, and does.  Caching
287 // safety and defs doubles validator performance.  Add other values only
288 // under guidance of a profiler.
289 class DecodedInstruction {
290  public:
DecodedInstruction(uint32_t vaddr,nacl_arm_dec::Instruction inst,const nacl_arm_dec::ClassDecoder & decoder)291   DecodedInstruction(uint32_t vaddr, nacl_arm_dec::Instruction inst,
292                      const nacl_arm_dec::ClassDecoder& decoder)
293       // We eagerly compute both safety and defs here, because it turns out to
294       // be faster by 10% than doing either lazily and memoizing the result.
295       : vaddr_(vaddr),
296         inst_(inst),
297         decoder_(&decoder),
298         safety_(decoder.safety(inst_)),
299         defs_(decoder.defs(inst_))
300   {}
301 
addr()302   uint32_t addr() const { return vaddr_; }
303 
304   // 'this' dominates 'other', where 'this' is the instruction
305   // immediately preceding 'other': if 'other' executes, we can guarantee
306   // that 'this' was executed as well.
307 
308   // This is important if 'this' produces a sandboxed value that 'other'
309   // must consume.
310   //
311   // Note: If the conditions of the two instructions do
312   // not statically infer that the conditional execution is correct,
313   // we assume that it is not.
314   //
315   // Note that this function can't see the bundle size, so this result
316   // does not take it into account.  The SfiValidator reasons on this
317   // separately.
always_dominates(const DecodedInstruction & other)318   bool always_dominates(const DecodedInstruction& other) const {
319     nacl_arm_dec::Instruction::Condition cond1 = inst_.GetCondition();
320     nacl_arm_dec::Instruction::Condition cond2 = other.inst_.GetCondition();
321     return !defines(nacl_arm_dec::Register::Conditions()) &&
322         // TODO(jfb) Put back mixed-condition handling. See issue #3221.
323         //           SfiValidator::condition_implies[cond2][cond1];
324         ((cond1 == nacl_arm_dec::Instruction::AL) || (cond1 == cond2));
325   }
326 
327   // 'this' post-dominates 'other', where 'other' is the instruction
328   // immediately preceding 'this': if 'other' executes, we can guarantee
329   // that 'this' is executed as well.
330   //
331   // This is important if 'other' produces an unsafe value that 'this'
332   // fixes before it can leak out.
333   //
334   // Note: if the conditions of the two
335   // instructions do not statically infer that the conditional
336   // execution is correct, we assume that it is not.
337   //
338   // Note that this function can't see the bundle size, so this result
339   // does not take it into account.  The SfiValidator reasons on this
340   // separately.
always_postdominates(const DecodedInstruction & other)341   bool always_postdominates(const DecodedInstruction& other) const {
342     nacl_arm_dec::Instruction::Condition cond1 = other.inst_.GetCondition();
343     nacl_arm_dec::Instruction::Condition cond2 = inst_.GetCondition();
344     return !other.defines(nacl_arm_dec::Register::Conditions()) &&
345         // TODO(jfb) Put back mixed-condition handling. See issue #3221.
346         //           SfiValidator::condition_implies[cond1][cond2];
347         ((cond2 == nacl_arm_dec::Instruction::AL) || (cond1 == cond2));
348   }
349 
350   // Checks that the execution of 'this' is conditional on the test result
351   // (specifically, the Z flag being set) from 'other' -- which must be
352   // adjacent for this simple check to be meaningful.
is_eq_conditional_on(const DecodedInstruction & other)353   bool is_eq_conditional_on(const DecodedInstruction& other) const {
354     return inst_.GetCondition() == nacl_arm_dec::Instruction::EQ
355         && other.inst_.GetCondition() == nacl_arm_dec::Instruction::AL
356         && other.defines(nacl_arm_dec::Register::Conditions());
357   }
358 
359   // The methods below mirror those on ClassDecoder, but are cached and cheap.
safety()360   nacl_arm_dec::SafetyLevel safety() const { return safety_; }
defs()361   nacl_arm_dec::RegisterList defs() const { return defs_; }
362 
363   // The methods below pull values from ClassDecoder on demand.
is_relative_branch()364   bool is_relative_branch() const {
365     return decoder_->is_relative_branch(inst_);
366   }
367 
branch_target_register()368   const nacl_arm_dec::Register branch_target_register() const {
369     return decoder_->branch_target_register(inst_);
370   }
371 
is_literal_pool_head()372   bool is_literal_pool_head() const {
373     return decoder_->is_literal_pool_head(inst_);
374   }
375 
branch_target()376   uint32_t branch_target() const {
377     // branch_target_offset takes care of adding 8 to the instruction's
378     // immediate: the ARM manual states that "PC reads as the address of
379     // the current instruction plus 8".
380     return vaddr_ + decoder_->branch_target_offset(inst_);
381   }
382 
base_address_register()383   const nacl_arm_dec::Register base_address_register() const {
384     return decoder_->base_address_register(inst_);
385   }
386 
is_literal_load()387   bool is_literal_load() const {
388     return decoder_->is_literal_load(inst_);
389   }
390 
clears_bits(uint32_t mask)391   bool clears_bits(uint32_t mask) const {
392     return decoder_->clears_bits(inst_, mask);
393   }
394 
sets_Z_if_bits_clear(nacl_arm_dec::Register r,uint32_t mask)395   bool sets_Z_if_bits_clear(nacl_arm_dec::Register r, uint32_t mask) const {
396     return decoder_->sets_Z_if_bits_clear(inst_, r, mask);
397   }
398 
base_address_register_writeback_small_immediate()399   bool base_address_register_writeback_small_immediate() const {
400     return decoder_->base_address_register_writeback_small_immediate(inst_);
401   }
402 
is_load_thread_address_pointer()403   bool is_load_thread_address_pointer() const {
404     return decoder_->is_load_thread_address_pointer(inst_);
405   }
406 
407   // Some convenience methods, defined in terms of ClassDecoder:
defines(nacl_arm_dec::Register r)408   bool defines(nacl_arm_dec::Register r) const {
409     return defs().Contains(r);
410   }
411 
defines_any(nacl_arm_dec::RegisterList rl)412   bool defines_any(nacl_arm_dec::RegisterList rl) const {
413     return defs().ContainsAny(rl);
414   }
415 
defines_all(nacl_arm_dec::RegisterList rl)416   bool defines_all(nacl_arm_dec::RegisterList rl) const {
417     return defs().ContainsAll(rl);
418   }
419 
420   // Returns true if the instruction uses the given register.
uses(nacl_arm_dec::Register r)421   bool uses(nacl_arm_dec::Register r) const {
422      return decoder_->uses(inst_).Contains(r);
423   }
424 
inst()425   const nacl_arm_dec::Instruction& inst() const {
426     return inst_;
427   }
428 
Copy(const DecodedInstruction & other)429   DecodedInstruction& Copy(const DecodedInstruction& other) {
430     vaddr_ = other.vaddr_;
431     inst_.Copy(other.inst_);
432     decoder_ = other.decoder_;
433     safety_ = other.safety_;
434     defs_.Copy(other.defs_);
435     return *this;
436   }
437 
438  private:
439   uint32_t vaddr_;
440   nacl_arm_dec::Instruction inst_;
441   const nacl_arm_dec::ClassDecoder* decoder_;
442 
443   nacl_arm_dec::SafetyLevel safety_;
444   nacl_arm_dec::RegisterList defs_;
445 
446   NACL_DISALLOW_COPY_AND_ASSIGN(DecodedInstruction);
447 };
448 
449 // Describes a memory region that contains executable code.  Note that the code
450 // need not live in its final location -- we pretend the code lives at the
451 // provided start_addr, regardless of where the base pointer actually points.
452 class CodeSegment {
453  public:
CodeSegment(const uint8_t * base,uint32_t start_addr,size_t size)454   CodeSegment(const uint8_t* base, uint32_t start_addr, size_t size)
455       : base_(base),
456         start_addr_(start_addr),
457         size_(static_cast<uint32_t>(size)) {
458     CHECK(size <= std::numeric_limits<uint32_t>::max());
459     CHECK(start_addr <= std::numeric_limits<uint32_t>::max() - size_);
460   }
461 
begin_addr()462   uint32_t begin_addr() const { return start_addr_; }
end_addr()463   uint32_t end_addr() const { return start_addr_ + size_; }
size()464   uint32_t size() const { return size_; }
contains_address(uint32_t a)465   bool contains_address(uint32_t a) const {
466     return (a >= begin_addr()) && (a < end_addr());
467   }
468 
469   const nacl_arm_dec::Instruction operator[](uint32_t address) const {
470     const uint8_t* element = &base_[address - start_addr_];
471     return nacl_arm_dec::Instruction(
472         *reinterpret_cast<const uint32_t *>(element));
473   }
474 
475   bool operator<(const CodeSegment& other) const {
476     return start_addr_ < other.start_addr_;
477   }
478 
base()479   const uint8_t* base() const {
480     return base_;
481   }
482 
483  private:
484   const uint8_t* base_;
485   uint32_t start_addr_;
486   uint32_t size_;
487 };
488 
489 // A class that consumes reports of validation problems.
490 //
491 // Default implementation to be used with sel_ldr. All methods are
492 // just placeholders, so that code to generate diagnostics will link.
493 // If you want to generate error messages, use derived class ProblemReporter
494 // in problem_reporter.h
495 class ProblemSink {
496  public:
ProblemSink()497   ProblemSink() {}
~ProblemSink()498   virtual ~ProblemSink() {}
499 
500   // Helper function for reporting generic error messages using a
501   // printf style. How the corresponding data is used is left to
502   // the derived class.
503   //
504   // Arguments are:
505   //    violation - The type of violation being reported.
506   //    vaddr - The address of the instruction associated with the violation.
507   //    format - The format string to print out the corresponding diagnostic
508   //             message.
509   //     ... - Arguments to use with the format.
510   virtual void ReportProblemDiagnostic(nacl_arm_dec::Violation violation,
511                                        uint32_t vaddr,
512                                        const char* format, ...)
513                // Note: format is the 4th argument because of implicit this.
514                ATTRIBUTE_FORMAT_PRINTF(4, 5) = 0;
515 
516  private:
517   NACL_DISALLOW_COPY_AND_ASSIGN(ProblemSink);
518 };
519 
bundle_for_address(uint32_t address)520 const Bundle SfiValidator::bundle_for_address(uint32_t address) const {
521   uint32_t base = address & ~(bytes_per_bundle_ - 1);
522   return Bundle(base, bytes_per_bundle_);
523 }
524 
in_same_bundle(const DecodedInstruction & first,const DecodedInstruction & second)525 bool SfiValidator::in_same_bundle(const DecodedInstruction& first,
526                                   const DecodedInstruction& second) const {
527   return bundle_for_address(first.addr()) == bundle_for_address(second.addr());
528 }
529 
validate_instruction_pair_allowed(const DecodedInstruction & first,const DecodedInstruction & second,AddressSet * critical,nacl_arm_dec::Violation violation)530 nacl_arm_dec::ViolationSet SfiValidator::validate_instruction_pair_allowed(
531     const DecodedInstruction& first,
532     const DecodedInstruction& second,
533     AddressSet* critical,
534     nacl_arm_dec::Violation violation) const {
535   if (!in_same_bundle(first, second))
536     return nacl_arm_dec::ViolationBit(violation);
537   critical->add(second.addr());
538   return nacl_arm_dec::kNoViolations;
539 }
540 
541 }  // namespace nacl_arm_val
542 
543 #endif  // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
544