1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_WASM_DECODER_H_
6 #define V8_WASM_DECODER_H_
7 
8 #include <cinttypes>
9 #include <cstdarg>
10 #include <memory>
11 
12 #include "src/base/compiler-specific.h"
13 #include "src/base/memory.h"
14 #include "src/codegen/signature.h"
15 #include "src/flags/flags.h"
16 #include "src/utils/utils.h"
17 #include "src/utils/vector.h"
18 #include "src/wasm/wasm-result.h"
19 #include "src/zone/zone-containers.h"
20 
21 namespace v8 {
22 namespace internal {
23 namespace wasm {
24 
25 #define TRACE(...)                                    \
26   do {                                                \
27     if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
28   } while (false)
29 #define TRACE_IF(cond, ...)                                     \
30   do {                                                          \
31     if (FLAG_trace_wasm_decoder && (cond)) PrintF(__VA_ARGS__); \
32   } while (false)
33 
34 // A {DecodeResult} only stores the failure / success status, but no data.
35 using DecodeResult = VoidResult;
36 
37 // A helper utility to decode bytes, integers, fields, varints, etc, from
38 // a buffer of bytes.
39 class Decoder {
40  public:
41   enum ValidateFlag : bool { kValidate = true, kNoValidate = false };
42 
43   enum AdvancePCFlag : bool { kAdvancePc = true, kNoAdvancePc = false };
44 
45   enum TraceFlag : bool { kTrace = true, kNoTrace = false };
46 
47   Decoder(const byte* start, const byte* end, uint32_t buffer_offset = 0)
Decoder(start,start,end,buffer_offset)48       : Decoder(start, start, end, buffer_offset) {}
49   explicit Decoder(const Vector<const byte> bytes, uint32_t buffer_offset = 0)
50       : Decoder(bytes.begin(), bytes.begin() + bytes.length(), buffer_offset) {}
51   Decoder(const byte* start, const byte* pc, const byte* end,
52           uint32_t buffer_offset = 0)
start_(start)53       : start_(start), pc_(pc), end_(end), buffer_offset_(buffer_offset) {
54     DCHECK_LE(start, pc);
55     DCHECK_LE(pc, end);
56     DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
57   }
58 
59   virtual ~Decoder() = default;
60 
validate_size(const byte * pc,uint32_t length,const char * msg)61   inline bool validate_size(const byte* pc, uint32_t length, const char* msg) {
62     DCHECK_LE(start_, pc);
63     if (V8_UNLIKELY(pc > end_ || length > static_cast<uint32_t>(end_ - pc))) {
64       error(pc, msg);
65       return false;
66     }
67     return true;
68   }
69 
70   // Reads an 8-bit unsigned integer.
71   template <ValidateFlag validate>
72   inline uint8_t read_u8(const byte* pc, const char* msg = "expected 1 byte") {
73     return read_little_endian<uint8_t, validate>(pc, msg);
74   }
75 
76   // Reads a 16-bit unsigned integer (little endian).
77   template <ValidateFlag validate>
78   inline uint16_t read_u16(const byte* pc,
79                            const char* msg = "expected 2 bytes") {
80     return read_little_endian<uint16_t, validate>(pc, msg);
81   }
82 
83   // Reads a 32-bit unsigned integer (little endian).
84   template <ValidateFlag validate>
85   inline uint32_t read_u32(const byte* pc,
86                            const char* msg = "expected 4 bytes") {
87     return read_little_endian<uint32_t, validate>(pc, msg);
88   }
89 
90   // Reads a 64-bit unsigned integer (little endian).
91   template <ValidateFlag validate>
92   inline uint64_t read_u64(const byte* pc,
93                            const char* msg = "expected 8 bytes") {
94     return read_little_endian<uint64_t, validate>(pc, msg);
95   }
96 
97   // Reads a variable-length unsigned integer (little endian).
98   template <ValidateFlag validate>
99   uint32_t read_u32v(const byte* pc, uint32_t* length,
100                      const char* name = "LEB32") {
101     return read_leb<uint32_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
102                                                                 name);
103   }
104 
105   // Reads a variable-length signed integer (little endian).
106   template <ValidateFlag validate>
107   int32_t read_i32v(const byte* pc, uint32_t* length,
108                     const char* name = "signed LEB32") {
109     return read_leb<int32_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
110                                                                name);
111   }
112 
113   // Reads a variable-length unsigned integer (little endian).
114   template <ValidateFlag validate>
115   uint64_t read_u64v(const byte* pc, uint32_t* length,
116                      const char* name = "LEB64") {
117     return read_leb<uint64_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
118                                                                 name);
119   }
120 
121   // Reads a variable-length signed integer (little endian).
122   template <ValidateFlag validate>
123   int64_t read_i64v(const byte* pc, uint32_t* length,
124                     const char* name = "signed LEB64") {
125     return read_leb<int64_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
126                                                                name);
127   }
128 
129   // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
130   uint8_t consume_u8(const char* name = "uint8_t") {
131     return consume_little_endian<uint8_t>(name);
132   }
133 
134   // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
135   uint16_t consume_u16(const char* name = "uint16_t") {
136     return consume_little_endian<uint16_t>(name);
137   }
138 
139   // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
140   uint32_t consume_u32(const char* name = "uint32_t") {
141     return consume_little_endian<uint32_t>(name);
142   }
143 
144   // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
145   uint32_t consume_u32v(const char* name = nullptr) {
146     uint32_t length = 0;
147     return read_leb<uint32_t, kValidate, kAdvancePc, kTrace>(pc_, &length,
148                                                              name);
149   }
150 
151   // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
152   int32_t consume_i32v(const char* name = nullptr) {
153     uint32_t length = 0;
154     return read_leb<int32_t, kValidate, kAdvancePc, kTrace>(pc_, &length, name);
155   }
156 
157   // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
158   void consume_bytes(uint32_t size, const char* name = "skip") {
159     // Only trace if the name is not null.
160     TRACE_IF(name, "  +%u  %-20s: %u bytes\n", pc_offset(), name, size);
161     if (checkAvailable(size)) {
162       pc_ += size;
163     } else {
164       pc_ = end_;
165     }
166   }
167 
168   // Check that at least {size} bytes exist between {pc_} and {end_}.
checkAvailable(uint32_t size)169   bool checkAvailable(uint32_t size) {
170     DCHECK_LE(pc_, end_);
171     if (V8_UNLIKELY(size > static_cast<uint32_t>(end_ - pc_))) {
172       errorf(pc_, "expected %u bytes, fell off end", size);
173       return false;
174     }
175     return true;
176   }
177 
178   // Do not inline error methods. This has measurable impact on validation time,
179   // see https://crbug.com/910432.
error(const char * msg)180   void V8_NOINLINE error(const char* msg) { errorf(pc_offset(), "%s", msg); }
error(const uint8_t * pc,const char * msg)181   void V8_NOINLINE error(const uint8_t* pc, const char* msg) {
182     errorf(pc_offset(pc), "%s", msg);
183   }
error(uint32_t offset,const char * msg)184   void V8_NOINLINE error(uint32_t offset, const char* msg) {
185     errorf(offset, "%s", msg);
186   }
187 
188   void V8_NOINLINE PRINTF_FORMAT(3, 4)
errorf(uint32_t offset,const char * format,...)189       errorf(uint32_t offset, const char* format, ...) {
190     va_list args;
191     va_start(args, format);
192     verrorf(offset, format, args);
193     va_end(args);
194   }
195 
196   void V8_NOINLINE PRINTF_FORMAT(3, 4)
errorf(const uint8_t * pc,const char * format,...)197       errorf(const uint8_t* pc, const char* format, ...) {
198     va_list args;
199     va_start(args, format);
200     verrorf(pc_offset(pc), format, args);
201     va_end(args);
202   }
203 
204   // Behavior triggered on first error, overridden in subclasses.
onFirstError()205   virtual void onFirstError() {}
206 
207   // Debugging helper to print a bytes range as hex bytes.
traceByteRange(const byte * start,const byte * end)208   void traceByteRange(const byte* start, const byte* end) {
209     DCHECK_LE(start, end);
210     for (const byte* p = start; p < end; ++p) TRACE("%02x ", *p);
211   }
212 
213   // Debugging helper to print bytes up to the end.
traceOffEnd()214   void traceOffEnd() {
215     traceByteRange(pc_, end_);
216     TRACE("<end>\n");
217   }
218 
219   // Converts the given value to a {Result}, copying the error if necessary.
220   template <typename T, typename U = typename std::remove_reference<T>::type>
toResult(T && val)221   Result<U> toResult(T&& val) {
222     if (failed()) {
223       TRACE("Result error: %s\n", error_.message().c_str());
224       return Result<U>{error_};
225     }
226     return Result<U>{std::forward<T>(val)};
227   }
228 
229   // Resets the boundaries of this decoder.
230   void Reset(const byte* start, const byte* end, uint32_t buffer_offset = 0) {
231     DCHECK_LE(start, end);
232     DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
233     start_ = start;
234     pc_ = start;
235     end_ = end;
236     buffer_offset_ = buffer_offset;
237     error_ = {};
238   }
239 
240   void Reset(Vector<const uint8_t> bytes, uint32_t buffer_offset = 0) {
241     Reset(bytes.begin(), bytes.end(), buffer_offset);
242   }
243 
ok()244   bool ok() const { return error_.empty(); }
failed()245   bool failed() const { return !ok(); }
more()246   bool more() const { return pc_ < end_; }
error()247   const WasmError& error() const { return error_; }
248 
start()249   const byte* start() const { return start_; }
pc()250   const byte* pc() const { return pc_; }
position()251   uint32_t V8_INLINE position() const {
252     return static_cast<uint32_t>(pc_ - start_);
253   }
254   // This needs to be inlined for performance (see https://crbug.com/910432).
pc_offset(const uint8_t * pc)255   uint32_t V8_INLINE pc_offset(const uint8_t* pc) const {
256     DCHECK_LE(start_, pc);
257     DCHECK_GE(kMaxUInt32 - buffer_offset_, pc - start_);
258     return static_cast<uint32_t>(pc - start_) + buffer_offset_;
259   }
pc_offset()260   uint32_t pc_offset() const { return pc_offset(pc_); }
buffer_offset()261   uint32_t buffer_offset() const { return buffer_offset_; }
262   // Takes an offset relative to the module start and returns an offset relative
263   // to the current buffer of the decoder.
GetBufferRelativeOffset(uint32_t offset)264   uint32_t GetBufferRelativeOffset(uint32_t offset) const {
265     DCHECK_LE(buffer_offset_, offset);
266     return offset - buffer_offset_;
267   }
end()268   const byte* end() const { return end_; }
set_end(const byte * end)269   void set_end(const byte* end) { end_ = end; }
270 
271   // Check if the byte at {offset} from the current pc equals {expected}.
lookahead(int offset,byte expected)272   bool lookahead(int offset, byte expected) {
273     DCHECK_LE(pc_, end_);
274     return end_ - pc_ > offset && pc_[offset] == expected;
275   }
276 
277  protected:
278   const byte* start_;
279   const byte* pc_;
280   const byte* end_;
281   // The offset of the current buffer in the module. Needed for streaming.
282   uint32_t buffer_offset_;
283   WasmError error_;
284 
285  private:
verrorf(uint32_t offset,const char * format,va_list args)286   void verrorf(uint32_t offset, const char* format, va_list args) {
287     // Only report the first error.
288     if (!ok()) return;
289     constexpr int kMaxErrorMsg = 256;
290     EmbeddedVector<char, kMaxErrorMsg> buffer;
291     int len = VSNPrintF(buffer, format, args);
292     CHECK_LT(0, len);
293     error_ = {offset, {buffer.begin(), static_cast<size_t>(len)}};
294     onFirstError();
295   }
296 
297   template <typename IntType, bool validate>
read_little_endian(const byte * pc,const char * msg)298   inline IntType read_little_endian(const byte* pc, const char* msg) {
299     if (!validate) {
300       DCHECK(validate_size(pc, sizeof(IntType), msg));
301     } else if (!validate_size(pc, sizeof(IntType), msg)) {
302       return IntType{0};
303     }
304     return base::ReadLittleEndianValue<IntType>(reinterpret_cast<Address>(pc));
305   }
306 
307   template <typename IntType>
consume_little_endian(const char * name)308   inline IntType consume_little_endian(const char* name) {
309     TRACE("  +%u  %-20s: ", pc_offset(), name);
310     if (!checkAvailable(sizeof(IntType))) {
311       traceOffEnd();
312       pc_ = end_;
313       return IntType{0};
314     }
315     IntType val = read_little_endian<IntType, false>(pc_, name);
316     traceByteRange(pc_, pc_ + sizeof(IntType));
317     TRACE("= %d\n", val);
318     pc_ += sizeof(IntType);
319     return val;
320   }
321 
322   template <typename IntType, ValidateFlag validate, AdvancePCFlag advance_pc,
323             TraceFlag trace>
324   inline IntType read_leb(const byte* pc, uint32_t* length,
325                           const char* name = "varint") {
326     DCHECK_IMPLIES(advance_pc, pc == pc_);
327     TRACE_IF(trace, "  +%u  %-20s: ", pc_offset(), name);
328     return read_leb_tail<IntType, validate, advance_pc, trace, 0>(pc, length,
329                                                                   name, 0);
330   }
331 
332   template <typename IntType, ValidateFlag validate, AdvancePCFlag advance_pc,
333             TraceFlag trace, int byte_index>
read_leb_tail(const byte * pc,uint32_t * length,const char * name,IntType result)334   IntType read_leb_tail(const byte* pc, uint32_t* length, const char* name,
335                         IntType result) {
336     constexpr bool is_signed = std::is_signed<IntType>::value;
337     constexpr int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
338     static_assert(byte_index < kMaxLength, "invalid template instantiation");
339     constexpr int shift = byte_index * 7;
340     constexpr bool is_last_byte = byte_index == kMaxLength - 1;
341     const bool at_end = validate && pc >= end_;
342     byte b = 0;
343     if (!at_end) {
344       DCHECK_LT(pc, end_);
345       b = *pc;
346       TRACE_IF(trace, "%02x ", b);
347       using Unsigned = typename std::make_unsigned<IntType>::type;
348       result = result |
349                (static_cast<Unsigned>(static_cast<IntType>(b) & 0x7f) << shift);
350     }
351     if (!is_last_byte && (b & 0x80)) {
352       // Make sure that we only instantiate the template for valid byte indexes.
353       // Compilers are not smart enough to figure out statically that the
354       // following call is unreachable if is_last_byte is false.
355       constexpr int next_byte_index = byte_index + (is_last_byte ? 0 : 1);
356       return read_leb_tail<IntType, validate, advance_pc, trace,
357                            next_byte_index>(pc + 1, length, name, result);
358     }
359     if (advance_pc) pc_ = pc + (at_end ? 0 : 1);
360     *length = byte_index + (at_end ? 0 : 1);
361     if (validate && (at_end || (b & 0x80))) {
362       TRACE_IF(trace, at_end ? "<end> " : "<length overflow> ");
363       errorf(pc, "expected %s", name);
364       result = 0;
365     }
366     if (is_last_byte) {
367       // A signed-LEB128 must sign-extend the final byte, excluding its
368       // most-significant bit; e.g. for a 32-bit LEB128:
369       //   kExtraBits = 4  (== 32 - (5-1) * 7)
370       // For unsigned values, the extra bits must be all zero.
371       // For signed values, the extra bits *plus* the most significant bit must
372       // either be 0, or all ones.
373       constexpr int kExtraBits = (sizeof(IntType) * 8) - ((kMaxLength - 1) * 7);
374       constexpr int kSignExtBits = kExtraBits - (is_signed ? 1 : 0);
375       const byte checked_bits = b & (0xFF << kSignExtBits);
376       constexpr byte kSignExtendedExtraBits = 0x7f & (0xFF << kSignExtBits);
377       bool valid_extra_bits =
378           checked_bits == 0 ||
379           (is_signed && checked_bits == kSignExtendedExtraBits);
380       if (!validate) {
381         DCHECK(valid_extra_bits);
382       } else if (!valid_extra_bits) {
383         error(pc, "extra bits in varint");
384         result = 0;
385       }
386     }
387     constexpr int sign_ext_shift =
388         is_signed ? Max(0, int{8 * sizeof(IntType)} - shift - 7) : 0;
389     // Perform sign extension.
390     result = (result << sign_ext_shift) >> sign_ext_shift;
391     if (trace && is_signed) {
392       TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result));
393     } else if (trace) {
394       TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result));
395     }
396     return result;
397   }
398 };
399 
400 #undef TRACE
401 }  // namespace wasm
402 }  // namespace internal
403 }  // namespace v8
404 
405 #endif  // V8_WASM_DECODER_H_
406