1 // Copyright 2015 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_WASM_DECODER_H_ 6 #define V8_WASM_DECODER_H_ 7 8 #include <cinttypes> 9 #include <cstdarg> 10 #include <memory> 11 12 #include "src/base/compiler-specific.h" 13 #include "src/base/memory.h" 14 #include "src/codegen/signature.h" 15 #include "src/flags/flags.h" 16 #include "src/utils/vector.h" 17 #include "src/wasm/wasm-opcodes.h" 18 #include "src/wasm/wasm-result.h" 19 #include "src/zone/zone-containers.h" 20 21 namespace v8 { 22 namespace internal { 23 namespace wasm { 24 25 #define TRACE(...) \ 26 do { \ 27 if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \ 28 } while (false) 29 #define TRACE_IF(cond, ...) \ 30 do { \ 31 if (FLAG_trace_wasm_decoder && (cond)) PrintF(__VA_ARGS__); \ 32 } while (false) 33 34 // A {DecodeResult} only stores the failure / success status, but no data. 35 using DecodeResult = VoidResult; 36 37 // A helper utility to decode bytes, integers, fields, varints, etc, from 38 // a buffer of bytes. 39 class Decoder { 40 public: 41 // {ValidateFlag} can be used in a boolean manner ({if (!validate) ...}). 42 enum ValidateFlag : int8_t { 43 kNoValidation = 0, // Don't run validation, assume valid input. 44 kBooleanValidation, // Run validation but only store a generic error. 45 kFullValidation // Run full validation with error message and location. 46 }; 47 48 enum TraceFlag : bool { kTrace = true, kNoTrace = false }; 49 50 Decoder(const byte* start, const byte* end, uint32_t buffer_offset = 0) Decoder(start,start,end,buffer_offset)51 : Decoder(start, start, end, buffer_offset) {} 52 explicit Decoder(const Vector<const byte> bytes, uint32_t buffer_offset = 0) 53 : Decoder(bytes.begin(), bytes.begin() + bytes.length(), buffer_offset) {} 54 Decoder(const byte* start, const byte* pc, const byte* end, 55 uint32_t buffer_offset = 0) start_(start)56 : start_(start), pc_(pc), end_(end), buffer_offset_(buffer_offset) { 57 DCHECK_LE(start, pc); 58 DCHECK_LE(pc, end); 59 DCHECK_EQ(static_cast<uint32_t>(end - start), end - start); 60 } 61 62 virtual ~Decoder() = default; 63 validate_size(const byte * pc,uint32_t length,const char * msg)64 bool validate_size(const byte* pc, uint32_t length, const char* msg) { 65 DCHECK_LE(start_, pc); 66 if (V8_UNLIKELY(pc > end_ || length > static_cast<uint32_t>(end_ - pc))) { 67 error(pc, msg); 68 return false; 69 } 70 return true; 71 } 72 73 // Reads an 8-bit unsigned integer. 74 template <ValidateFlag validate> 75 uint8_t read_u8(const byte* pc, const char* msg = "expected 1 byte") { 76 return read_little_endian<uint8_t, validate>(pc, msg); 77 } 78 79 // Reads a 16-bit unsigned integer (little endian). 80 template <ValidateFlag validate> 81 uint16_t read_u16(const byte* pc, const char* msg = "expected 2 bytes") { 82 return read_little_endian<uint16_t, validate>(pc, msg); 83 } 84 85 // Reads a 32-bit unsigned integer (little endian). 86 template <ValidateFlag validate> 87 uint32_t read_u32(const byte* pc, const char* msg = "expected 4 bytes") { 88 return read_little_endian<uint32_t, validate>(pc, msg); 89 } 90 91 // Reads a 64-bit unsigned integer (little endian). 92 template <ValidateFlag validate> 93 uint64_t read_u64(const byte* pc, const char* msg = "expected 8 bytes") { 94 return read_little_endian<uint64_t, validate>(pc, msg); 95 } 96 97 // Reads a variable-length unsigned integer (little endian). 98 template <ValidateFlag validate> 99 uint32_t read_u32v(const byte* pc, uint32_t* length, 100 const char* name = "LEB32") { 101 return read_leb<uint32_t, validate, kNoTrace>(pc, length, name); 102 } 103 104 // Reads a variable-length signed integer (little endian). 105 template <ValidateFlag validate> 106 int32_t read_i32v(const byte* pc, uint32_t* length, 107 const char* name = "signed LEB32") { 108 return read_leb<int32_t, validate, kNoTrace>(pc, length, name); 109 } 110 111 // Reads a variable-length unsigned integer (little endian). 112 template <ValidateFlag validate> 113 uint64_t read_u64v(const byte* pc, uint32_t* length, 114 const char* name = "LEB64") { 115 return read_leb<uint64_t, validate, kNoTrace>(pc, length, name); 116 } 117 118 // Reads a variable-length signed integer (little endian). 119 template <ValidateFlag validate> 120 int64_t read_i64v(const byte* pc, uint32_t* length, 121 const char* name = "signed LEB64") { 122 return read_leb<int64_t, validate, kNoTrace>(pc, length, name); 123 } 124 125 // Reads a variable-length 33-bit signed integer (little endian). 126 template <ValidateFlag validate> 127 int64_t read_i33v(const byte* pc, uint32_t* length, 128 const char* name = "signed LEB33") { 129 return read_leb<int64_t, validate, kNoTrace, 33>(pc, length, name); 130 } 131 132 // Convenient overload for callers who don't care about length. 133 template <ValidateFlag validate> read_prefixed_opcode(const byte * pc)134 WasmOpcode read_prefixed_opcode(const byte* pc) { 135 uint32_t len; 136 return read_prefixed_opcode<validate>(pc, &len); 137 } 138 139 // Reads a prefixed-opcode, possibly with variable-length index. 140 // `length` is set to the number of bytes that make up this opcode, 141 // *including* the prefix byte. For most opcodes, it will be 2. 142 template <ValidateFlag validate> 143 WasmOpcode read_prefixed_opcode(const byte* pc, uint32_t* length, 144 const char* name = "prefixed opcode") { 145 uint32_t index; 146 147 // Prefixed opcodes all use LEB128 encoding. 148 index = read_u32v<validate>(pc + 1, length, "prefixed opcode index"); 149 *length += 1; // Prefix byte. 150 // Only support opcodes that go up to 0xFF (when decoded). Anything 151 // bigger will need 1 more byte, and the '<< 8' below will be wrong. 152 if (validate && V8_UNLIKELY(index > 0xff)) { 153 errorf(pc, "Invalid prefixed opcode %d", index); 154 // If size validation fails. 155 index = 0; 156 *length = 0; 157 } 158 159 return static_cast<WasmOpcode>((*pc) << 8 | index); 160 } 161 162 // Reads a 8-bit unsigned integer (byte) and advances {pc_}. 163 uint8_t consume_u8(const char* name = "uint8_t") { 164 return consume_little_endian<uint8_t>(name); 165 } 166 167 // Reads a 16-bit unsigned integer (little endian) and advances {pc_}. 168 uint16_t consume_u16(const char* name = "uint16_t") { 169 return consume_little_endian<uint16_t>(name); 170 } 171 172 // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}. 173 uint32_t consume_u32(const char* name = "uint32_t") { 174 return consume_little_endian<uint32_t>(name); 175 } 176 177 // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}. 178 uint32_t consume_u32v(const char* name = nullptr) { 179 uint32_t length = 0; 180 uint32_t result = 181 read_leb<uint32_t, kFullValidation, kTrace>(pc_, &length, name); 182 pc_ += length; 183 return result; 184 } 185 186 // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}. 187 int32_t consume_i32v(const char* name = nullptr) { 188 uint32_t length = 0; 189 int32_t result = 190 read_leb<int32_t, kFullValidation, kTrace>(pc_, &length, name); 191 pc_ += length; 192 return result; 193 } 194 195 // Reads a LEB128 variable-length unsigned 64-bit integer and advances {pc_}. 196 uint64_t consume_u64v(const char* name = nullptr) { 197 uint32_t length = 0; 198 uint64_t result = 199 read_leb<uint64_t, kFullValidation, kTrace>(pc_, &length, name); 200 pc_ += length; 201 return result; 202 } 203 204 // Consume {size} bytes and send them to the bit bucket, advancing {pc_}. 205 void consume_bytes(uint32_t size, const char* name = "skip") { 206 // Only trace if the name is not null. 207 TRACE_IF(name, " +%u %-20s: %u bytes\n", pc_offset(), name, size); 208 if (checkAvailable(size)) { 209 pc_ += size; 210 } else { 211 pc_ = end_; 212 } 213 } 214 215 // Check that at least {size} bytes exist between {pc_} and {end_}. checkAvailable(uint32_t size)216 bool checkAvailable(uint32_t size) { 217 DCHECK_LE(pc_, end_); 218 if (V8_UNLIKELY(size > static_cast<uint32_t>(end_ - pc_))) { 219 errorf(pc_, "expected %u bytes, fell off end", size); 220 return false; 221 } 222 return true; 223 } 224 225 // Use this for "boolean validation", i.e. if the error message is not used 226 // anyway. MarkError()227 void V8_NOINLINE MarkError() { 228 if (!ok()) return; 229 error_ = {0, "validation failed"}; 230 onFirstError(); 231 } 232 233 // Do not inline error methods. This has measurable impact on validation time, 234 // see https://crbug.com/910432. error(const char * msg)235 void V8_NOINLINE error(const char* msg) { errorf(pc_offset(), "%s", msg); } error(const uint8_t * pc,const char * msg)236 void V8_NOINLINE error(const uint8_t* pc, const char* msg) { 237 errorf(pc_offset(pc), "%s", msg); 238 } error(uint32_t offset,const char * msg)239 void V8_NOINLINE error(uint32_t offset, const char* msg) { 240 errorf(offset, "%s", msg); 241 } 242 errorf(const char * format,...)243 void V8_NOINLINE PRINTF_FORMAT(2, 3) errorf(const char* format, ...) { 244 va_list args; 245 va_start(args, format); 246 verrorf(pc_offset(), format, args); 247 va_end(args); 248 } 249 250 void V8_NOINLINE PRINTF_FORMAT(3, 4) errorf(uint32_t offset,const char * format,...)251 errorf(uint32_t offset, const char* format, ...) { 252 va_list args; 253 va_start(args, format); 254 verrorf(offset, format, args); 255 va_end(args); 256 } 257 258 void V8_NOINLINE PRINTF_FORMAT(3, 4) errorf(const uint8_t * pc,const char * format,...)259 errorf(const uint8_t* pc, const char* format, ...) { 260 va_list args; 261 va_start(args, format); 262 verrorf(pc_offset(pc), format, args); 263 va_end(args); 264 } 265 266 // Behavior triggered on first error, overridden in subclasses. onFirstError()267 virtual void onFirstError() {} 268 269 // Debugging helper to print a bytes range as hex bytes. traceByteRange(const byte * start,const byte * end)270 void traceByteRange(const byte* start, const byte* end) { 271 DCHECK_LE(start, end); 272 for (const byte* p = start; p < end; ++p) TRACE("%02x ", *p); 273 } 274 275 // Debugging helper to print bytes up to the end. traceOffEnd()276 void traceOffEnd() { 277 traceByteRange(pc_, end_); 278 TRACE("<end>\n"); 279 } 280 281 // Converts the given value to a {Result}, copying the error if necessary. 282 template <typename T, typename U = typename std::remove_reference<T>::type> toResult(T && val)283 Result<U> toResult(T&& val) { 284 if (failed()) { 285 TRACE("Result error: %s\n", error_.message().c_str()); 286 return Result<U>{error_}; 287 } 288 return Result<U>{std::forward<T>(val)}; 289 } 290 291 // Resets the boundaries of this decoder. 292 void Reset(const byte* start, const byte* end, uint32_t buffer_offset = 0) { 293 DCHECK_LE(start, end); 294 DCHECK_EQ(static_cast<uint32_t>(end - start), end - start); 295 start_ = start; 296 pc_ = start; 297 end_ = end; 298 buffer_offset_ = buffer_offset; 299 error_ = {}; 300 } 301 302 void Reset(Vector<const uint8_t> bytes, uint32_t buffer_offset = 0) { 303 Reset(bytes.begin(), bytes.end(), buffer_offset); 304 } 305 ok()306 bool ok() const { return error_.empty(); } failed()307 bool failed() const { return !ok(); } more()308 bool more() const { return pc_ < end_; } error()309 const WasmError& error() const { return error_; } 310 start()311 const byte* start() const { return start_; } pc()312 const byte* pc() const { return pc_; } position()313 uint32_t V8_INLINE position() const { 314 return static_cast<uint32_t>(pc_ - start_); 315 } 316 // This needs to be inlined for performance (see https://crbug.com/910432). pc_offset(const uint8_t * pc)317 uint32_t V8_INLINE pc_offset(const uint8_t* pc) const { 318 DCHECK_LE(start_, pc); 319 DCHECK_GE(kMaxUInt32 - buffer_offset_, pc - start_); 320 return static_cast<uint32_t>(pc - start_) + buffer_offset_; 321 } pc_offset()322 uint32_t pc_offset() const { return pc_offset(pc_); } buffer_offset()323 uint32_t buffer_offset() const { return buffer_offset_; } 324 // Takes an offset relative to the module start and returns an offset relative 325 // to the current buffer of the decoder. GetBufferRelativeOffset(uint32_t offset)326 uint32_t GetBufferRelativeOffset(uint32_t offset) const { 327 DCHECK_LE(buffer_offset_, offset); 328 return offset - buffer_offset_; 329 } end()330 const byte* end() const { return end_; } set_end(const byte * end)331 void set_end(const byte* end) { end_ = end; } 332 333 // Check if the byte at {offset} from the current pc equals {expected}. lookahead(int offset,byte expected)334 bool lookahead(int offset, byte expected) { 335 DCHECK_LE(pc_, end_); 336 return end_ - pc_ > offset && pc_[offset] == expected; 337 } 338 339 protected: 340 const byte* start_; 341 const byte* pc_; 342 const byte* end_; 343 // The offset of the current buffer in the module. Needed for streaming. 344 uint32_t buffer_offset_; 345 WasmError error_; 346 347 private: verrorf(uint32_t offset,const char * format,va_list args)348 void verrorf(uint32_t offset, const char* format, va_list args) { 349 // Only report the first error. 350 if (!ok()) return; 351 constexpr int kMaxErrorMsg = 256; 352 EmbeddedVector<char, kMaxErrorMsg> buffer; 353 int len = VSNPrintF(buffer, format, args); 354 CHECK_LT(0, len); 355 error_ = {offset, {buffer.begin(), static_cast<size_t>(len)}}; 356 onFirstError(); 357 } 358 359 template <typename IntType, ValidateFlag validate> read_little_endian(const byte * pc,const char * msg)360 IntType read_little_endian(const byte* pc, const char* msg) { 361 if (!validate) { 362 DCHECK(validate_size(pc, sizeof(IntType), msg)); 363 } else if (!validate_size(pc, sizeof(IntType), msg)) { 364 return IntType{0}; 365 } 366 return base::ReadLittleEndianValue<IntType>(reinterpret_cast<Address>(pc)); 367 } 368 369 template <typename IntType> consume_little_endian(const char * name)370 IntType consume_little_endian(const char* name) { 371 TRACE(" +%u %-20s: ", pc_offset(), name); 372 if (!checkAvailable(sizeof(IntType))) { 373 traceOffEnd(); 374 pc_ = end_; 375 return IntType{0}; 376 } 377 IntType val = read_little_endian<IntType, kNoValidation>(pc_, name); 378 traceByteRange(pc_, pc_ + sizeof(IntType)); 379 TRACE("= %d\n", val); 380 pc_ += sizeof(IntType); 381 return val; 382 } 383 384 template <typename IntType, ValidateFlag validate, TraceFlag trace, 385 size_t size_in_bits = 8 * sizeof(IntType)> 386 V8_INLINE IntType read_leb(const byte* pc, uint32_t* length, 387 const char* name = "varint") { 388 static_assert(size_in_bits <= 8 * sizeof(IntType), 389 "leb does not fit in type"); 390 TRACE_IF(trace, " +%u %-20s: ", pc_offset(), name); 391 // Fast path for single-byte integers. 392 if ((!validate || V8_LIKELY(pc < end_)) && !(*pc & 0x80)) { 393 TRACE_IF(trace, "%02x ", *pc); 394 *length = 1; 395 IntType result = *pc; 396 if (std::is_signed<IntType>::value) { 397 // Perform sign extension. 398 constexpr int sign_ext_shift = int{8 * sizeof(IntType)} - 7; 399 result = (result << sign_ext_shift) >> sign_ext_shift; 400 TRACE_IF(trace, "= %" PRIi64 "\n", static_cast<int64_t>(result)); 401 } else { 402 TRACE_IF(trace, "= %" PRIu64 "\n", static_cast<uint64_t>(result)); 403 } 404 return result; 405 } 406 return read_leb_slowpath<IntType, validate, trace, size_in_bits>(pc, length, 407 name); 408 } 409 410 template <typename IntType, ValidateFlag validate, TraceFlag trace, 411 size_t size_in_bits = 8 * sizeof(IntType)> read_leb_slowpath(const byte * pc,uint32_t * length,const char * name)412 V8_NOINLINE IntType read_leb_slowpath(const byte* pc, uint32_t* length, 413 const char* name) { 414 // Create an unrolled LEB decoding function per integer type. 415 return read_leb_tail<IntType, validate, trace, size_in_bits, 0>(pc, length, 416 name, 0); 417 } 418 419 template <typename IntType, ValidateFlag validate, TraceFlag trace, 420 size_t size_in_bits, int byte_index> read_leb_tail(const byte * pc,uint32_t * length,const char * name,IntType result)421 V8_INLINE IntType read_leb_tail(const byte* pc, uint32_t* length, 422 const char* name, IntType result) { 423 constexpr bool is_signed = std::is_signed<IntType>::value; 424 constexpr int kMaxLength = (size_in_bits + 6) / 7; 425 static_assert(byte_index < kMaxLength, "invalid template instantiation"); 426 constexpr int shift = byte_index * 7; 427 constexpr bool is_last_byte = byte_index == kMaxLength - 1; 428 const bool at_end = validate && pc >= end_; 429 byte b = 0; 430 if (V8_LIKELY(!at_end)) { 431 DCHECK_LT(pc, end_); 432 b = *pc; 433 TRACE_IF(trace, "%02x ", b); 434 using Unsigned = typename std::make_unsigned<IntType>::type; 435 result = result | 436 (static_cast<Unsigned>(static_cast<IntType>(b) & 0x7f) << shift); 437 } 438 if (!is_last_byte && (b & 0x80)) { 439 // Make sure that we only instantiate the template for valid byte indexes. 440 // Compilers are not smart enough to figure out statically that the 441 // following call is unreachable if is_last_byte is false. 442 constexpr int next_byte_index = byte_index + (is_last_byte ? 0 : 1); 443 return read_leb_tail<IntType, validate, trace, size_in_bits, 444 next_byte_index>(pc + 1, length, name, result); 445 } 446 *length = byte_index + (at_end ? 0 : 1); 447 if (validate && V8_UNLIKELY(at_end || (b & 0x80))) { 448 TRACE_IF(trace, at_end ? "<end> " : "<length overflow> "); 449 if (validate == kFullValidation) { 450 errorf(pc, "expected %s", name); 451 } else { 452 MarkError(); 453 } 454 result = 0; 455 *length = 0; 456 } 457 if (is_last_byte) { 458 // A signed-LEB128 must sign-extend the final byte, excluding its 459 // most-significant bit; e.g. for a 32-bit LEB128: 460 // kExtraBits = 4 (== 32 - (5-1) * 7) 461 // For unsigned values, the extra bits must be all zero. 462 // For signed values, the extra bits *plus* the most significant bit must 463 // either be 0, or all ones. 464 constexpr int kExtraBits = size_in_bits - ((kMaxLength - 1) * 7); 465 constexpr int kSignExtBits = kExtraBits - (is_signed ? 1 : 0); 466 const byte checked_bits = b & (0xFF << kSignExtBits); 467 constexpr byte kSignExtendedExtraBits = 0x7f & (0xFF << kSignExtBits); 468 const bool valid_extra_bits = 469 checked_bits == 0 || 470 (is_signed && checked_bits == kSignExtendedExtraBits); 471 if (!validate) { 472 DCHECK(valid_extra_bits); 473 } else if (V8_UNLIKELY(!valid_extra_bits)) { 474 if (validate == kFullValidation) { 475 error(pc, "extra bits in varint"); 476 } else { 477 MarkError(); 478 } 479 result = 0; 480 *length = 0; 481 } 482 } 483 constexpr int sign_ext_shift = 484 is_signed ? std::max(0, int{8 * sizeof(IntType)} - shift - 7) : 0; 485 // Perform sign extension. 486 result = (result << sign_ext_shift) >> sign_ext_shift; 487 if (trace && is_signed) { 488 TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result)); 489 } else if (trace) { 490 TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result)); 491 } 492 return result; 493 } 494 }; 495 496 #undef TRACE 497 } // namespace wasm 498 } // namespace internal 499 } // namespace v8 500 501 #endif // V8_WASM_DECODER_H_ 502