1 /*
2  * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_INST_CLASSES_H_
8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_INST_CLASSES_H_
9 
10 #include <climits>
11 
12 #include "native_client/src/include/nacl_macros.h"
13 #include "native_client/src/include/portability.h"
14 #include "native_client/src/trusted/validator_arm/model.h"
15 
16 /*
17  * Models the "instruction classes" that the decoder produces.
18  */
19 namespace nacl_arm_val {
20 class AddressSet;
21 class SfiValidator;
22 class DecodedInstruction;
23 class ProblemSink;
24 class InstructionPairMatchData;
25 }
26 
27 namespace nacl_arm_dec {
28 
29 // Used to describe whether an instruction is safe, and if not, what the issue
30 // is.  Only instructions that MAY_BE_SAFE should be allowed in untrusted code,
31 // and even those may be rejected by the validator.
32 //
33 // Note: The enumerated values are used in dgen_core.py (see class
34 // SafetyAction).  Be sure to update values in that class if this list
35 // changes, so that the two stay in sync.
36 //
37 // Note: All safety levels except MAY_BE_SAFE, also act as a violation
38 // (see enum Violation below). If you change this enum, also change
39 // Violation below.  Further, be sure to keep MAY_BE_SAFE as the last
40 // entry in this enum, since code (elsewhere) assumes that MAY_BE_SAFE
41 // appears last in the list.
42 enum SafetyLevel {
43   // The initial value of uninitialized SafetyLevels -- treat as unsafe.
44   UNINITIALIZED = 0,
45 
46   // Values put into one (or more) registers is not known, as specified
47   // by the ARMv7 ISA spec.
48   // See instructions VSWP, VTRN, VUZP, and VZIP for examples of this.
49   UNKNOWN,
50   // This instruction is left undefined by the ARMv7 ISA spec.
51   UNDEFINED,
52   // This instruction is not recognized by the decoder functions.
53   NOT_IMPLEMENTED,
54   // This instruction has unpredictable effects at runtime.
55   UNPREDICTABLE,
56   // This instruction is deprecated in ARMv7.
57   DEPRECATED,
58 
59   // This instruction is forbidden by our SFI model.
60   FORBIDDEN,
61   // This instruction's operands are forbidden by our SFI model.
62   FORBIDDEN_OPERANDS,
63 
64   // This instruction was decoded incorrectly, because it should have decoded
65   // as a different instruction. This value should never occur, unless there
66   // is a bug in our decoder tables (in file armv7.table).
67   DECODER_ERROR,
68 
69   // This instruction may be safe in untrusted code: in isolation it contains
70   // nothing scary, but the validator may overrule this during global analysis.
71   MAY_BE_SAFE
72 };
73 
74 // Defines the set of validation violations that are found by the
75 // NaCl validator. Used to speed up generation of diagnostics, by only
76 // checking for corresponding found violations.
77 enum Violation {
78   // Note: Each (unsafe) safety level also corresponds to a violation. The
79   // following violations capture these unsafe violations.
80   // Note: Be sure to include an initialization value of the corresponding
81   // SafetyLevel entry, so that code can assume the corresponding safety
82   // violation has the same value as the safety level.
83   UNINITIALIZED_VIOLATION = UNINITIALIZED,
84   UNKNOWN_VIOLATION = UNKNOWN,
85   UNDEFINED_VIOLATION = UNDEFINED,
86   NOT_IMPLEMENTED_VIOLATION = NOT_IMPLEMENTED,
87   UNPREDICTABLE_VIOLATION = UNPREDICTABLE,
88   DEPRECATED_VIOLATION = DEPRECATED,
89   FORBIDDEN_VIOLATION = FORBIDDEN,
90   FORBIDDEN_OPERANDS_VIOLATION = FORBIDDEN_OPERANDS,
91   DECODER_ERROR_VIOLATION = DECODER_ERROR,
92   // Note: The next enumerated value is intentionally set to
93   // MAY_BE_SAFE, to guarantee that all remaining violations do not
94   // overlap safety violations.
95   //
96   // Reports that the load/store uses an unsafe base address.  A base address is
97   // safe if it
98   //     1. Has specific bits masked off by its immediate predecessor, or
99   //     2. Is predicated on those bits being clear, as tested by its immediate
100   //        predecessor, or
101   //     3. Is in a register defined as always containing a safe address.
102   // Note: Predication checks (in 2) may be disabled on some architectures.
103   LOADSTORE_VIOLATION = MAY_BE_SAFE,
104   // Reports that the load/store uses a safe base address, but violates the
105   // condition that the instruction pair can't cross a bundle boundary.
106   LOADSTORE_CROSSES_BUNDLE_VIOLATION,
107   // Reports that the indirect branch uses an unsafe destination address.  A
108   // destination address is safe if it has specific bits masked off by its
109   // immediate predecessor.
110   BRANCH_MASK_VIOLATION,
111   // Reports that the indirect branch uses a safe destination address, but
112   // violates the condition that the instruction pair can't cross a bundle
113   // boundary.
114   BRANCH_MASK_CROSSES_BUNDLE_VIOLATION,
115   // Reports that the instruction updates a data-address register, but isn't
116   // immediately followed by a mask.
117   DATA_REGISTER_UPDATE_VIOLATION,
118   // Reports that the instruction safely updates a data-address register, but
119   // violates the condition that the instruction pair can't cross a bundle
120   // boundary.
121   //
122   // This isn't strictly needed for security. The second instruction (i.e. the
123   // mask), can be run without running the first instruction. Further, if
124   // the first instruction is run, we can still guarantee that the second will
125   // also. However, for simplicity, the current validator assumes that all
126   // instruction pairs must be atomic.
127   DATA_REGISTER_UPDATE_CROSSES_BUNDLE_VIOLATION,
128   // Reports that the call instruction isn't the last instruction in
129   // the bundle.
130   //
131   // This is not a security check per se. Rather, it is a check to prevent
132   // imbalancing the CPU's return stack, thereby decreasing performance.
133   CALL_POSITION_VIOLATION,
134   // Reports that the instruction sets a read-only register.
135   READ_ONLY_VIOLATION,
136   // Reports if the instruction reads the thread local pointer.
137   READ_THREAD_LOCAL_POINTER_VIOLATION,
138   // Reports if the program counter was updated without using one of the
139   // approved branch instruction.
140   PC_WRITES_VIOLATION,
141   // A branch that branches into the middle of a multiple instruction
142   // pseudo-operation.
143   BRANCH_SPLITS_PATTERN_VIOLATION,
144   // A branch to instruction outside the code segment.
145   BRANCH_OUT_OF_RANGE_VIOLATION,
146   // Any other type of violation. Must appear last in the enumeration.
147   // Value is used in testing to guarantee that the corresponding
148   // bitset ViolationSet can hold all validation violations.
149   OTHER_VIOLATION
150 };
151 
152 // Defines the bitset of found validation violations.
153 typedef uint32_t ViolationSet;
154 
155 // Defines the notion of a empty violation set.
156 static const ViolationSet kNoViolations = 0x0;
157 
158 // Returns true if a safety violation.
IsSafetyViolation(Violation v)159 inline bool IsSafetyViolation(Violation v) {
160   return (static_cast<int>(v) >= 0) && (static_cast<int>(v) < MAY_BE_SAFE);
161 }
162 
163 // Converts a safety level to the corresponding bit in the violation set.
SafetyViolationBit(SafetyLevel level)164 inline ViolationSet SafetyViolationBit(SafetyLevel level) {
165   return static_cast<ViolationSet>(0x1) << level;
166 }
167 
168 // Converts a validation violation to a ViolationSet containing
169 // the corresponding validation violation.
ViolationBit(Violation violation)170 inline ViolationSet ViolationBit(
171     Violation violation) {
172   NACL_COMPILE_TIME_ASSERT(static_cast<size_t>(OTHER_VIOLATION) <
173                            sizeof(ViolationSet) * CHAR_BIT);
174   return static_cast<ViolationSet>(0x1) << violation;
175 }
176 
177 // Defines the set of all safety violations.
178 // Note: Assumes that CROSSES_BUNDLE_VIOLATION defines the range
179 // of safety violations
180 static const ViolationSet kSafetyViolations =
181   SafetyViolationBit(MAY_BE_SAFE) - 1;
182 
183 // Defines the set of violations that cross bundle boundaries.
184 static const ViolationSet kCrossesBundleViolations =
185   ViolationBit(LOADSTORE_CROSSES_BUNDLE_VIOLATION) |
186   ViolationBit(BRANCH_MASK_CROSSES_BUNDLE_VIOLATION) |
187   ViolationBit(DATA_REGISTER_UPDATE_CROSSES_BUNDLE_VIOLATION);
188 
189 // Returns the union of the two validation violation sets.
ViolationUnion(ViolationSet vset1,ViolationSet vset2)190 inline ViolationSet ViolationUnion(ViolationSet vset1, ViolationSet vset2) {
191   return vset1 | vset2;
192 }
193 
194 // Returns the intersection of two validation violation sets.
ViolationIntersect(ViolationSet vset1,ViolationSet vset2)195 inline ViolationSet ViolationIntersect(ViolationSet vset1,
196                                        ViolationSet vset2) {
197   return vset1 & vset2;
198 }
199 
200 // Returns true if the given validation violation set contains the
201 // given violation.
ContainsViolation(ViolationSet vset,Violation violation)202 inline bool ContainsViolation(ViolationSet vset, Violation violation) {
203   return ViolationIntersect(vset, ViolationBit(violation)) != kNoViolations;
204 }
205 
206 // Returns true if the violation set contains a safety violation.
ContainsSafetyViolations(ViolationSet vset)207 inline bool ContainsSafetyViolations(ViolationSet vset) {
208   return ViolationIntersect(vset, kSafetyViolations) != kNoViolations;
209 }
210 
211 // Returns true if the violation set contains a violation that crosses
212 // bundle boundaries.
ContainsCrossesBundleViolation(ViolationSet vset)213 inline bool ContainsCrossesBundleViolation(ViolationSet vset) {
214   return ViolationIntersect(vset, kCrossesBundleViolations) != kNoViolations;
215 }
216 
217 // Returns true if the violation is a violation that crosses
218 // bundle boundaries.
IsCrossesBundleViolation(Violation violation)219 inline bool IsCrossesBundleViolation(Violation violation) {
220   return ContainsCrossesBundleViolation(ViolationBit(violation));
221 }
222 
223 // A class decoder is designed to decode a set of instructions that
224 // have the same semantics, in terms of what the validator needs. This
225 // includes the bit ranges in the instruction that correspond to
226 // assigned registers.  as well as whether the instruction is safe to
227 // use within the validator.
228 //
229 // The important property of these class decoders is that the
230 // corresponding DecoderState (defined in decoder.h) will inspect the
231 // instruction bits and then dispatch the appropriate class decoder.
232 //
233 // The virtuals defined in this class are intended to be used solely
234 // for the purpose of the validator. For example, for virtual "defs",
235 // the class decoder will look at the bits defining the assigned
236 // register of the instruction (typically in bits 12 through 15) and
237 // add that register to the set of registers returned by the "defs"
238 // virtual.
239 //
240 // There is an underlying assumption that class decoders are constant
241 // and only provide implementation details for the instructions they
242 // should be applied to. In general, class decoders should not be
243 // copied or assigned. Hence, only a no-argument constructor should be
244 // provided.
245 class ClassDecoder {
246  public:
247   // Checks how safe this instruction is, in isolation.
248   // This will detect any violation in the ARMv7 spec -- undefined encodings,
249   // use of registers that are unpredictable -- and the most basic constraints
250   // in our SFI model.  Because ClassDecoders are referentially-transparent and
251   // cannot touch global state, this will not check things that may vary with
252   // ABI version.
253   //
254   // Note: To best take advantage of the testing system, define this function
255   // to return DECODER_ERROR immediately, if DECODER_ERROR is to be returned by
256   // this virtual. This allows testing to (quietly) detect when it is
257   // ok that the expected decoder wasn't the actual decoder selected by the
258   // instruction decoder.
259   //
260   // The most positive result this can return is called MAY_BE_SAFE because it
261   // is necessary, but not sufficient: the validator has the final say.
262   virtual SafetyLevel safety(Instruction i) const = 0;
263 
264   // Gets the set of registers affected when an instruction executes.  This set
265   // is complete, and includes
266   //  - explicit destination (general purpose) register(s),
267   //  - changes to condition APSR flags NZCV.
268   //  - indexed-addressing writeback,
269   //  - changes to the program counter by branches,
270   //  - implicit register results, like branch-with-link.
271   //
272   // Note: This virtual only tracks effects to ARM general purpose flags, and
273   // NZCV APSR flags.
274   //
275   // Note: If you are unsure if an instruction changes condition flags, be
276   // conservative and add it to the set of registers returned by this
277   // function. Failure to do so may cause a potential break in pattern
278   // atomicity, which checks that two instructions run under the same condition.
279   //
280   // The default implementation returns a ridiculous bitmask that suggests that
281   // all possible side effects will occur -- override if this is not
282   // appropriate. :-)
283   virtual RegisterList defs(Instruction i) const;
284 
285   // Gets the set of general purpose registers used by the instruction.
286   // This set includes:
287   //  - explicit source (general purpose) register(s).
288   //  - implicit registers, like branch-with-link.
289   //
290   // The default implementation returns the empty set.
291   virtual RegisterList uses(Instruction i) const;
292 
293   // Returns true if the base register has small immediate writeback.
294   //
295   // This distinction is useful for operations like SP-relative loads, because
296   // the maximum displacement that immediate addressing can produce is small and
297   // will therefore never cross guard pages if the base register isn't
298   // constrained to the untrusted address space.
299   //
300   // Note that this does not include writeback produced by *register* indexed
301   // addressing writeback, since they have no useful properties in our model.
302   //
303   // Stubbed to indicate that no such writeback occurs.
304   virtual bool base_address_register_writeback_small_immediate(
305       Instruction i) const;
306 
307   // For instructions that can read or write memory, gets the register used as
308   // the base for generating the effective address.
309   //
310   // It is stubbed to return nonsense.
311   virtual Register base_address_register(Instruction i) const;
312 
313   // Checks whether the instruction is a PC-relative load + immediate.
314   //
315   // It is stubbed to return false.
316   virtual bool is_literal_load(Instruction i) const;
317 
318   // For indirect branch instructions, returns the register being moved into
319   // r15.  Otherwise, reports Register::None().
320   //
321   // Note that this exclusively describes instructions that write r15 from a
322   // register, unmodified.  This means BX, BLX, and MOV without shift.  Not
323   // even BIC, which we allow to write to r15, is modeled this way.
324   //
325   virtual Register branch_target_register(Instruction i) const;
326 
327   // Checks whether the instruction is a direct relative branch -- meaning it
328   // adds a constant offset to r15.
329   virtual bool is_relative_branch(Instruction i) const;
330 
331   // For relative branches, gets the offset added to the instruction's
332   // virtual address to find the target.  The results are bogus unless
333   // is_relative_branch() returns true.
334   //
335   // Note that this is different than the offset added to r15 at runtime, since
336   // r15 reads as 8 bytes ahead.  This function does the math so you don't have
337   // to.
338   virtual int32_t branch_target_offset(Instruction i) const;
339 
340   // Checks whether this instruction is the special bit sequence that marks
341   // the start of a literal pool.
342   virtual bool is_literal_pool_head(Instruction i) const;
343 
344   // Checks that an instruction clears a certain pattern of bits in all its
345   // (non-flag) result registers.  The mask should include 1s in the positions
346   // that should be cleared.
347   virtual bool clears_bits(Instruction i, uint32_t mask) const;
348 
349   // Checks that an instruction will set Z if certain bits in r (chosen by 1s in
350   // the mask) are clear.
351   //
352   // Note that the inverse does not hold: the actual instruction i may require
353   // *more* bits to be clear to set Z.  This is fine.
354   virtual bool sets_Z_if_bits_clear(Instruction i,
355                                     Register r,
356                                     uint32_t mask) const;
357 
358   // Returns true only if the given thread register (r9) is used in one of
359   // the following forms:
360   //    ldr Rn, [r9]     ; load use thread pointer.
361   //    ldr Rn, [r9, #4] ; load IRT thread pointer.
362   // That is, accesses one of the two legal thread pointers.
363   //
364   // The default virtual returns false.
365   virtual bool is_load_thread_address_pointer(Instruction i) const;
366 
367   // Returns the sentinel version of the instruction for dynamic code
368   // replacement. In dynamic code replacement, only certain immediate
369   // constants for specialized instructions may be modified by a dynamic
370   // code replacement. For such instructions, this method returns the
371   // instruction with the immediate constant normalized to zero. For
372   // all other instructions, this method returns a copy of the instruction.
373   //
374   // This result is used by method ValidateSegmentPair in validator.cc to
375   // verify that only such constant changes are allowed.
376   //
377   // Note: This method should not be defined if any of the following
378   // virtuals are overridden by the decoder class, since they make assumptions
379   // about the literal constants within them:
380   //     offset_is_immediate
381   //     is_relative_branch
382   //     branch_target_offset
383   //     is_literal_pool_head
384   //     clears_bits
385   //     sets_Z_if_bits_clear
386   virtual Instruction dynamic_code_replacement_sentinel(Instruction i) const;
387 
388   // Checks the given pair of instructions, and returns found validation
389   // violations. Called with the class decoder associated with the second
390   // instruction. For violations that only look at a single instruction,
391   // they are assumed to apply to the second instruction in the pair.
392   //
393   // As a side effect, if the instructions are found not to include any
394   // violations, but affect state of the validation, the corresponding
395   // updates of the validation state is done.
396   //
397   //   first: The first instruction in the instruction pair to be validated.
398   //   second: The second instruction in the instruction pair to be validated.
399   //   sfi: The validator being used.
400   //   branches: gets filled in with the address of every direct branch.
401   //   critical: gets filled in with every address that isn't safe to jump to,
402   //       because it would split an otherwise-safe pseudo-op, or jumps into
403   //       the middle of a constant pool.
404   //   next_inst_addr: The address of the next instruction to be validated.
405   //       Set by the caller to the address of the instruction immediately
406   //       following the second instruction. If additional instruction should
407   //       be skipped (as with contant pool heads), this value should be updated
408   //       to point to the next instruction to be processed.
409   //
410   // Returns the validation violations found.
411   virtual ViolationSet get_violations(
412       const nacl_arm_val::DecodedInstruction& first,
413       const nacl_arm_val::DecodedInstruction& second,
414       const nacl_arm_val::SfiValidator& sfi,
415       nacl_arm_val::AddressSet* branches,
416       nacl_arm_val::AddressSet* critical,
417       uint32_t* next_inst_addr) const;
418 
419   // Generates diagnostic messages for validation violations found
420   // for the instruction pair. Called with the class decoder associated
421   // with the second instruction. Assumes it is only called if virtual
422   // get_violations returned a non-empty set.
423   //
424   //   violations: The set of validation violations detected by get_violations.
425   //   first: The first instruction in the instruction pair to be validated.
426   //   second: The second instruction in the instruction pair to be validated.
427   //   sfi: The validator being used.
428   //   out: The problem reporter to use to report diagnostics.
429   virtual void generate_diagnostics(
430       ViolationSet violations,
431       const nacl_arm_val::DecodedInstruction& first,
432       const nacl_arm_val::DecodedInstruction& second,
433       const nacl_arm_val::SfiValidator& sfi,
434       nacl_arm_val::ProblemSink* out) const;
435 
436  protected:
ClassDecoder()437   ClassDecoder() {}
~ClassDecoder()438   virtual ~ClassDecoder() {}
439 
440  private:
441   NACL_DISALLOW_COPY_AND_ASSIGN(ClassDecoder);
442 };
443 
444 }  // namespace nacl_arm_dec
445 
446 #endif  // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_INST_CLASSES_H_
447