1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 #ifndef NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
8 #define NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
9
10 /*
11 * The SFI validator, and some utility classes it uses.
12 */
13
14 #include <limits>
15 #include <stdlib.h>
16 #include <vector>
17
18 #include "native_client/src/include/nacl_compiler_annotations.h"
19 #include "native_client/src/include/nacl_string.h"
20 #include "native_client/src/include/portability.h"
21 #include "native_client/src/shared/platform/nacl_check.h"
22 #include "native_client/src/trusted/validator/ncvalidate.h"
23 #include "native_client/src/trusted/validator_arm/address_set.h"
24 #include "native_client/src/trusted/cpu_features/arch/arm/cpu_arm.h"
25 #include "native_client/src/trusted/validator_arm/gen/arm32_decode.h"
26 #include "native_client/src/trusted/validator_arm/inst_classes.h"
27 #include "native_client/src/trusted/validator_arm/model.h"
28
29 namespace nacl_arm_val {
30
31 // Forward declarations of classes used by-reference in the validator, and
32 // defined at the end of this file.
33 class CodeSegment;
34 class DecodedInstruction;
35 class ProblemSink;
36
37 // A simple model of an instruction bundle. Bundles consist of one or more
38 // instructions (two or more, in the useful case); the precise size is
39 // controlled by the parameters passed into SfiValidator, below.
40 class Bundle {
41 public:
Bundle(uint32_t virtual_base,uint32_t size_bytes)42 Bundle(uint32_t virtual_base, uint32_t size_bytes)
43 : virtual_base_(virtual_base), size_(size_bytes) {}
44
begin_addr()45 uint32_t begin_addr() const { return virtual_base_; }
end_addr()46 uint32_t end_addr() const { return virtual_base_ + size_; }
47
48 bool operator==(const Bundle& other) const {
49 // Note that all Bundles are currently assumed to be the same size.
50 return virtual_base_ == other.virtual_base_;
51 }
52
53 private:
54 uint32_t virtual_base_;
55 uint32_t size_;
56 };
57
58 // The SFI validator itself. The validator is controlled by the following
59 // inputs:
60 // bytes_per_bundle - the number of bytes in each bundle of instructions.
61 // Currently this tends to be 16, but we've evaluated alternatives.
62 // Must be a power of two.
63 // code_region_bytes - number of bytes in the code region, starting at address
64 // 0 and including the trampolines, etc. Must be a power of two.
65 // data_region_bits - number of bytes in the data region, starting at address
66 // 0 and including the code region. Must be a power of two.
67 // read_only_registers - registers that untrusted code must not alter (but may
68 // read). This currently applies to r9, where we store some thread state.
69 // data_address_registers - registers that must contain a valid data-region
70 // address at all times. This currently applies to the stack pointer, but
71 // could be extended to include a frame pointer for C-like languages.
72 // cpu_features - the ARM CPU whose features should be considered during
73 // validation. This matters because some CPUs don't support some
74 // instructions, leak information or have erratas when others do not,
75 // yet we still want to emit performant code for the given target.
76 //
77 // The values of these inputs will typically be taken from the headers of
78 // untrusted code -- either by the ABI version they indicate, or (perhaps in
79 // the future) explicit indicators of what SFI model they follow.
80 class SfiValidator {
81 public:
82 SfiValidator(uint32_t bytes_per_bundle,
83 uint32_t code_region_bytes,
84 uint32_t data_region_bytes,
85 nacl_arm_dec::RegisterList read_only_registers,
86 nacl_arm_dec::RegisterList data_address_registers,
87 const NaClCPUFeaturesArm *cpu_features);
88
89 explicit SfiValidator(const SfiValidator& v);
90
91 // The main validator entry point. Validates the provided CodeSegments,
92 // which must be in sorted order, reporting any problems through the
93 // ProblemSink.
94 //
95 // Returns true iff no problems were found.
validate(const std::vector<CodeSegment> & segments,ProblemSink * out)96 bool validate(const std::vector<CodeSegment>& segments, ProblemSink* out) {
97 return find_violations(segments, out) == nacl_arm_dec::kNoViolations;
98 }
99
100 // Returns true if validation did not depend on the code's base address.
is_position_independent()101 bool is_position_independent() {
102 return is_position_independent_;
103 }
104
105 // Alternate validator entry point. Validates the provided
106 // CodeSegments, which must be in sorted order, reporting any
107 // problems through the ProblemSink.
108 //
109 // Returns the violation set of found violations. Note: if problem
110 // sink short ciruits the validation of all code (via method
111 // should_continue), this set may not contain all types of
112 // violations found. All that this method guarantees is if the code
113 // has validation violations, the returned set will be non-empty.
114 //
115 // Note: This version of validating is useful for testing, when one
116 // might want to know why the code did not validate.
117 nacl_arm_dec::ViolationSet find_violations(
118 const std::vector<CodeSegment>& segments, ProblemSink* out);
119
120 // Entry point for validation of dynamic code replacement. Allows
121 // micromodifications of dynamically generated code in form of
122 // constant updates for inline caches and similar VM techniques.
123 // Very minimal modifications allowed, essentially only immediate
124 // value update for MOV or ORR instruction.
125 // Returns true iff no problems were found.
126 bool ValidateSegmentPair(const CodeSegment& old_code,
127 const CodeSegment& new_code,
128 ProblemSink* out);
129
130 // Entry point for dynamic code creation. Copies code from
131 // source segment to destination, performing validation
132 // and accounting for need of safe handling of cases,
133 // where code being replaced is executed.
134 // Returns true iff no problems were found.
135 bool CopyCode(const CodeSegment& source_code,
136 CodeSegment* dest_code,
137 NaClCopyInstructionFunc copy_func,
138 ProblemSink* out);
139
140 // A 2-dimensional array, defined on the Condition of two
141 // instructions, defining when we can statically prove that the
142 // conditions of the first instruction implies the conditions of the
143 // second instruction.
144 //
145 // Note: The first index (i.e. row) corresponds to the condition of
146 // the first instruction, while the second index (i.e. column)
147 // corresponds to the condition of the second instruction.
148 //
149 // Note: The order the instructions execute is not important in
150 // this array. The context defines which instruction, of the
151 // instruction pair being compared, appears first.
152 //
153 // Note: The decoder should prevent UNCONDITIONAL (0b1111) from ever
154 // occurring, but we include entries for it out of paranoia, which also
155 // happens to make the table 16x16, which is easier to index into.
156 static const bool
157 condition_implies[nacl_arm_dec::Instruction::kConditionSize + 1]
158 [nacl_arm_dec::Instruction::kConditionSize + 1];
159
160 // Checks whether the given Register always holds a valid data region address.
161 // This implies that the register is safe to use in unguarded stores.
is_data_address_register(nacl_arm_dec::Register r)162 bool is_data_address_register(nacl_arm_dec::Register r) const {
163 return data_address_registers_.Contains(r);
164 }
165
166 // Number of A32 instructions per bundle.
InstructionsPerBundle()167 uint32_t InstructionsPerBundle() const {
168 return bytes_per_bundle_ / (nacl_arm_dec::kArm32InstSize / 8);
169 }
170
code_address_mask()171 uint32_t code_address_mask() const {
172 return ~(code_region_bytes_ - 1) | (bytes_per_bundle_ - 1);
173 }
data_address_mask()174 uint32_t data_address_mask() const {
175 return ~(data_region_bytes_ - 1);
176 }
177
read_only_registers()178 nacl_arm_dec::RegisterList read_only_registers() const {
179 return read_only_registers_;
180 }
data_address_registers()181 nacl_arm_dec::RegisterList data_address_registers() const {
182 return data_address_registers_;
183 }
184
CpuFeatures()185 const NaClCPUFeaturesArm *CpuFeatures() const {
186 return &cpu_features_;
187 }
188
conditional_memory_access_allowed_for_sfi()189 bool conditional_memory_access_allowed_for_sfi() const {
190 return NaClGetCPUFeatureArm(CpuFeatures(),
191 NaClCPUFeatureArm_CanUseTstMem) != 0;
192 }
193
194 // Utility function that applies the decoder of the validator.
decode(nacl_arm_dec::Instruction inst)195 const nacl_arm_dec::ClassDecoder& decode(
196 nacl_arm_dec::Instruction inst) const {
197 return decode_state_.decode(inst);
198 }
199
200 // Returns the Bundle containing a given address.
201 inline const Bundle bundle_for_address(uint32_t address) const;
202
203 // Returns true if both addresses are in the same bundle.
204 inline bool in_same_bundle(const DecodedInstruction& first,
205 const DecodedInstruction& second) const;
206
207 // Checks that both instructions can be in the same bundle,
208 // add updates the critical set to include the second instruction,
209 // since it can't be safely jumped to. If the instruction crosses
210 // a bundle, a set with the given violation will be returned.
211 inline nacl_arm_dec::ViolationSet validate_instruction_pair_allowed(
212 const DecodedInstruction& first,
213 const DecodedInstruction& second,
214 AddressSet* critical,
215 nacl_arm_dec::Violation violation) const;
216
217 // Copy the given validator state.
218 SfiValidator& operator=(const SfiValidator& v);
219
220 // Returns true if address is the first address of a bundle.
is_bundle_head(uint32_t address)221 bool is_bundle_head(uint32_t address) const {
222 return (address & (bytes_per_bundle_ - 1)) == 0;
223 }
224
225 // Returns true if address is on a valid inst boundary
226 // and is not within a pseudo instruction
227 bool is_valid_inst_boundary(const CodeSegment& code, uint32_t addr);
228
229 private:
230 // The SfiValidator constructor could have been given invalid values.
231 // Returns true the values were bad, and send the details to the ProblemSink.
232 // This method should be called from every public validation method.
233 bool ConstructionFailed(ProblemSink* out);
234
235 // Validates a straight-line execution of the code, applying patterns. This
236 // is the first validation pass, which fills out the AddressSets for
237 // consumption by later analyses.
238 // branches - gets filled in with the address of every direct branch.
239 // critical - gets filled in with every address that isn't safe to jump to,
240 // because it would split an otherwise-safe pseudo-op.
241 //
242 // Returns the violation set of found violations. Note: if problem
243 // sink short ciruits the validation of all code (via method
244 // should_continue), this set may not contain all types of
245 // violations found. All that this method guarantees is if the code
246 // has validation violations, the returned set will be non-empty.
247 nacl_arm_dec::ViolationSet validate_fallthrough(
248 const CodeSegment& segment, ProblemSink* out,
249 AddressSet* branches, AddressSet* critical);
250
251 // Validates all branches found by a previous pass, checking
252 // destinations. Returns the violation set of found branch
253 // violations. Note: if problem sink short ciruits the validation of
254 // all code (via method should_continue), this set may not contain
255 // all types of violations found. All that this method guarantees is
256 // if the code has validation violations, the returned set will be
257 // non-empty.
258 nacl_arm_dec::ViolationSet validate_branches(
259 const std::vector<CodeSegment>& segments,
260 const AddressSet& branches, const AddressSet& critical,
261 ProblemSink* out);
262
263 NaClCPUFeaturesArm cpu_features_;
264 uint32_t bytes_per_bundle_;
265 uint32_t code_region_bytes_;
266 uint32_t data_region_bytes_;
267 // Registers which cannot be modified by untrusted code.
268 nacl_arm_dec::RegisterList read_only_registers_;
269 // Registers which must always contain a valid data region address.
270 nacl_arm_dec::RegisterList data_address_registers_;
271 // Defines the decoder parser to use.
272 const nacl_arm_dec::Arm32DecoderState decode_state_;
273 // True if construction failed and further validation should be prevented.
274 bool construction_failed_;
275 // True if validation did not depend on the code's base address.
276 bool is_position_independent_;
277 };
278
279
280 // A facade that combines an Instruction with its address and a ClassDecoder.
281 // This makes the patterns substantially easier to write and read than managing
282 // all three variables separately.
283 //
284 // ClassDecoders do all decoding on-demand, with no caching. DecodedInstruction
285 // has knowledge of the validator, and pairs a ClassDecoder with a constant
286 // Instruction -- so it can cache commonly used values, and does. Caching
287 // safety and defs doubles validator performance. Add other values only
288 // under guidance of a profiler.
289 class DecodedInstruction {
290 public:
DecodedInstruction(uint32_t vaddr,nacl_arm_dec::Instruction inst,const nacl_arm_dec::ClassDecoder & decoder)291 DecodedInstruction(uint32_t vaddr, nacl_arm_dec::Instruction inst,
292 const nacl_arm_dec::ClassDecoder& decoder)
293 // We eagerly compute both safety and defs here, because it turns out to
294 // be faster by 10% than doing either lazily and memoizing the result.
295 : vaddr_(vaddr),
296 inst_(inst),
297 decoder_(&decoder),
298 safety_(decoder.safety(inst_)),
299 defs_(decoder.defs(inst_))
300 {}
301
addr()302 uint32_t addr() const { return vaddr_; }
303
304 // 'this' dominates 'other', where 'this' is the instruction
305 // immediately preceding 'other': if 'other' executes, we can guarantee
306 // that 'this' was executed as well.
307
308 // This is important if 'this' produces a sandboxed value that 'other'
309 // must consume.
310 //
311 // Note: If the conditions of the two instructions do
312 // not statically infer that the conditional execution is correct,
313 // we assume that it is not.
314 //
315 // Note that this function can't see the bundle size, so this result
316 // does not take it into account. The SfiValidator reasons on this
317 // separately.
always_dominates(const DecodedInstruction & other)318 bool always_dominates(const DecodedInstruction& other) const {
319 nacl_arm_dec::Instruction::Condition cond1 = inst_.GetCondition();
320 nacl_arm_dec::Instruction::Condition cond2 = other.inst_.GetCondition();
321 return !defines(nacl_arm_dec::Register::Conditions()) &&
322 // TODO(jfb) Put back mixed-condition handling. See issue #3221.
323 // SfiValidator::condition_implies[cond2][cond1];
324 ((cond1 == nacl_arm_dec::Instruction::AL) || (cond1 == cond2));
325 }
326
327 // 'this' post-dominates 'other', where 'other' is the instruction
328 // immediately preceding 'this': if 'other' executes, we can guarantee
329 // that 'this' is executed as well.
330 //
331 // This is important if 'other' produces an unsafe value that 'this'
332 // fixes before it can leak out.
333 //
334 // Note: if the conditions of the two
335 // instructions do not statically infer that the conditional
336 // execution is correct, we assume that it is not.
337 //
338 // Note that this function can't see the bundle size, so this result
339 // does not take it into account. The SfiValidator reasons on this
340 // separately.
always_postdominates(const DecodedInstruction & other)341 bool always_postdominates(const DecodedInstruction& other) const {
342 nacl_arm_dec::Instruction::Condition cond1 = other.inst_.GetCondition();
343 nacl_arm_dec::Instruction::Condition cond2 = inst_.GetCondition();
344 return !other.defines(nacl_arm_dec::Register::Conditions()) &&
345 // TODO(jfb) Put back mixed-condition handling. See issue #3221.
346 // SfiValidator::condition_implies[cond1][cond2];
347 ((cond2 == nacl_arm_dec::Instruction::AL) || (cond1 == cond2));
348 }
349
350 // Checks that the execution of 'this' is conditional on the test result
351 // (specifically, the Z flag being set) from 'other' -- which must be
352 // adjacent for this simple check to be meaningful.
is_eq_conditional_on(const DecodedInstruction & other)353 bool is_eq_conditional_on(const DecodedInstruction& other) const {
354 return inst_.GetCondition() == nacl_arm_dec::Instruction::EQ
355 && other.inst_.GetCondition() == nacl_arm_dec::Instruction::AL
356 && other.defines(nacl_arm_dec::Register::Conditions());
357 }
358
359 // The methods below mirror those on ClassDecoder, but are cached and cheap.
safety()360 nacl_arm_dec::SafetyLevel safety() const { return safety_; }
defs()361 nacl_arm_dec::RegisterList defs() const { return defs_; }
362
363 // The methods below pull values from ClassDecoder on demand.
is_relative_branch()364 bool is_relative_branch() const {
365 return decoder_->is_relative_branch(inst_);
366 }
367
branch_target_register()368 const nacl_arm_dec::Register branch_target_register() const {
369 return decoder_->branch_target_register(inst_);
370 }
371
is_literal_pool_head()372 bool is_literal_pool_head() const {
373 return decoder_->is_literal_pool_head(inst_);
374 }
375
branch_target()376 uint32_t branch_target() const {
377 // branch_target_offset takes care of adding 8 to the instruction's
378 // immediate: the ARM manual states that "PC reads as the address of
379 // the current instruction plus 8".
380 return vaddr_ + decoder_->branch_target_offset(inst_);
381 }
382
base_address_register()383 const nacl_arm_dec::Register base_address_register() const {
384 return decoder_->base_address_register(inst_);
385 }
386
is_literal_load()387 bool is_literal_load() const {
388 return decoder_->is_literal_load(inst_);
389 }
390
clears_bits(uint32_t mask)391 bool clears_bits(uint32_t mask) const {
392 return decoder_->clears_bits(inst_, mask);
393 }
394
sets_Z_if_bits_clear(nacl_arm_dec::Register r,uint32_t mask)395 bool sets_Z_if_bits_clear(nacl_arm_dec::Register r, uint32_t mask) const {
396 return decoder_->sets_Z_if_bits_clear(inst_, r, mask);
397 }
398
base_address_register_writeback_small_immediate()399 bool base_address_register_writeback_small_immediate() const {
400 return decoder_->base_address_register_writeback_small_immediate(inst_);
401 }
402
is_load_thread_address_pointer()403 bool is_load_thread_address_pointer() const {
404 return decoder_->is_load_thread_address_pointer(inst_);
405 }
406
407 // Some convenience methods, defined in terms of ClassDecoder:
defines(nacl_arm_dec::Register r)408 bool defines(nacl_arm_dec::Register r) const {
409 return defs().Contains(r);
410 }
411
defines_any(nacl_arm_dec::RegisterList rl)412 bool defines_any(nacl_arm_dec::RegisterList rl) const {
413 return defs().ContainsAny(rl);
414 }
415
defines_all(nacl_arm_dec::RegisterList rl)416 bool defines_all(nacl_arm_dec::RegisterList rl) const {
417 return defs().ContainsAll(rl);
418 }
419
420 // Returns true if the instruction uses the given register.
uses(nacl_arm_dec::Register r)421 bool uses(nacl_arm_dec::Register r) const {
422 return decoder_->uses(inst_).Contains(r);
423 }
424
inst()425 const nacl_arm_dec::Instruction& inst() const {
426 return inst_;
427 }
428
Copy(const DecodedInstruction & other)429 DecodedInstruction& Copy(const DecodedInstruction& other) {
430 vaddr_ = other.vaddr_;
431 inst_.Copy(other.inst_);
432 decoder_ = other.decoder_;
433 safety_ = other.safety_;
434 defs_.Copy(other.defs_);
435 return *this;
436 }
437
438 private:
439 uint32_t vaddr_;
440 nacl_arm_dec::Instruction inst_;
441 const nacl_arm_dec::ClassDecoder* decoder_;
442
443 nacl_arm_dec::SafetyLevel safety_;
444 nacl_arm_dec::RegisterList defs_;
445
446 NACL_DISALLOW_COPY_AND_ASSIGN(DecodedInstruction);
447 };
448
449 // Describes a memory region that contains executable code. Note that the code
450 // need not live in its final location -- we pretend the code lives at the
451 // provided start_addr, regardless of where the base pointer actually points.
452 class CodeSegment {
453 public:
CodeSegment(const uint8_t * base,uint32_t start_addr,size_t size)454 CodeSegment(const uint8_t* base, uint32_t start_addr, size_t size)
455 : base_(base),
456 start_addr_(start_addr),
457 size_(static_cast<uint32_t>(size)) {
458 CHECK(size <= std::numeric_limits<uint32_t>::max());
459 CHECK(start_addr <= std::numeric_limits<uint32_t>::max() - size_);
460 }
461
begin_addr()462 uint32_t begin_addr() const { return start_addr_; }
end_addr()463 uint32_t end_addr() const { return start_addr_ + size_; }
size()464 uint32_t size() const { return size_; }
contains_address(uint32_t a)465 bool contains_address(uint32_t a) const {
466 return (a >= begin_addr()) && (a < end_addr());
467 }
468
469 const nacl_arm_dec::Instruction operator[](uint32_t address) const {
470 const uint8_t* element = &base_[address - start_addr_];
471 return nacl_arm_dec::Instruction(
472 *reinterpret_cast<const uint32_t *>(element));
473 }
474
475 bool operator<(const CodeSegment& other) const {
476 return start_addr_ < other.start_addr_;
477 }
478
base()479 const uint8_t* base() const {
480 return base_;
481 }
482
483 private:
484 const uint8_t* base_;
485 uint32_t start_addr_;
486 uint32_t size_;
487 };
488
489 // A class that consumes reports of validation problems.
490 //
491 // Default implementation to be used with sel_ldr. All methods are
492 // just placeholders, so that code to generate diagnostics will link.
493 // If you want to generate error messages, use derived class ProblemReporter
494 // in problem_reporter.h
495 class ProblemSink {
496 public:
ProblemSink()497 ProblemSink() {}
~ProblemSink()498 virtual ~ProblemSink() {}
499
500 // Helper function for reporting generic error messages using a
501 // printf style. How the corresponding data is used is left to
502 // the derived class.
503 //
504 // Arguments are:
505 // violation - The type of violation being reported.
506 // vaddr - The address of the instruction associated with the violation.
507 // format - The format string to print out the corresponding diagnostic
508 // message.
509 // ... - Arguments to use with the format.
510 virtual void ReportProblemDiagnostic(nacl_arm_dec::Violation violation,
511 uint32_t vaddr,
512 const char* format, ...)
513 // Note: format is the 4th argument because of implicit this.
514 ATTRIBUTE_FORMAT_PRINTF(4, 5) = 0;
515
516 private:
517 NACL_DISALLOW_COPY_AND_ASSIGN(ProblemSink);
518 };
519
bundle_for_address(uint32_t address)520 const Bundle SfiValidator::bundle_for_address(uint32_t address) const {
521 uint32_t base = address & ~(bytes_per_bundle_ - 1);
522 return Bundle(base, bytes_per_bundle_);
523 }
524
in_same_bundle(const DecodedInstruction & first,const DecodedInstruction & second)525 bool SfiValidator::in_same_bundle(const DecodedInstruction& first,
526 const DecodedInstruction& second) const {
527 return bundle_for_address(first.addr()) == bundle_for_address(second.addr());
528 }
529
validate_instruction_pair_allowed(const DecodedInstruction & first,const DecodedInstruction & second,AddressSet * critical,nacl_arm_dec::Violation violation)530 nacl_arm_dec::ViolationSet SfiValidator::validate_instruction_pair_allowed(
531 const DecodedInstruction& first,
532 const DecodedInstruction& second,
533 AddressSet* critical,
534 nacl_arm_dec::Violation violation) const {
535 if (!in_same_bundle(first, second))
536 return nacl_arm_dec::ViolationBit(violation);
537 critical->add(second.addr());
538 return nacl_arm_dec::kNoViolations;
539 }
540
541 } // namespace nacl_arm_val
542
543 #endif // NATIVE_CLIENT_SRC_TRUSTED_VALIDATOR_ARM_V2_VALIDATOR_H
544