// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_WASM_DECODER_H_
#define V8_WASM_DECODER_H_
#include "src/base/compiler-specific.h"
#include "src/base/smart-pointers.h"
#include "src/flags.h"
#include "src/signature.h"
#include "src/utils.h"
#include "src/wasm/wasm-result.h"
#include "src/zone-containers.h"
namespace v8 {
namespace internal {
namespace wasm {
#if DEBUG
#define TRACE(...) \
do { \
if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
} while (false)
#else
#define TRACE(...)
#endif
#if !(V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_ARM)
#define UNALIGNED_ACCESS_OK 1
#else
#define UNALIGNED_ACCESS_OK 0
#endif
// A helper utility to decode bytes, integers, fields, varints, etc, from
// a buffer of bytes.
class Decoder {
public:
Decoder(const byte* start, const byte* end)
: start_(start),
pc_(start),
limit_(end),
end_(end),
error_pc_(nullptr),
error_pt_(nullptr) {}
virtual ~Decoder() {}
inline bool check(const byte* base, unsigned offset, unsigned length,
const char* msg) {
DCHECK_GE(base, start_);
if ((base + offset + length) > limit_) {
error(base, base + offset, "%s", msg);
return false;
}
return true;
}
// Reads a single 8-bit byte, reporting an error if out of bounds.
inline uint8_t checked_read_u8(const byte* base, unsigned offset,
const char* msg = "expected 1 byte") {
return check(base, offset, 1, msg) ? base[offset] : 0;
}
// Reads 16-bit word, reporting an error if out of bounds.
inline uint16_t checked_read_u16(const byte* base, unsigned offset,
const char* msg = "expected 2 bytes") {
return check(base, offset, 2, msg) ? read_u16(base + offset) : 0;
}
// Reads 32-bit word, reporting an error if out of bounds.
inline uint32_t checked_read_u32(const byte* base, unsigned offset,
const char* msg = "expected 4 bytes") {
return check(base, offset, 4, msg) ? read_u32(base + offset) : 0;
}
// Reads 64-bit word, reporting an error if out of bounds.
inline uint64_t checked_read_u64(const byte* base, unsigned offset,
const char* msg = "expected 8 bytes") {
return check(base, offset, 8, msg) ? read_u64(base + offset) : 0;
}
// Reads a variable-length unsigned integer (little endian).
uint32_t checked_read_u32v(const byte* base, unsigned offset,
unsigned* length,
const char* msg = "expected LEB32") {
return checked_read_leb<uint32_t, false>(base, offset, length, msg);
}
// Reads a variable-length signed integer (little endian).
int32_t checked_read_i32v(const byte* base, unsigned offset, unsigned* length,
const char* msg = "expected SLEB32") {
uint32_t result =
checked_read_leb<uint32_t, true>(base, offset, length, msg);
if (*length == 5) return bit_cast<int32_t>(result);
if (*length > 0) {
int shift = 32 - 7 * *length;
// Perform sign extension.
return bit_cast<int32_t>(result << shift) >> shift;
}
return 0;
}
// Reads a variable-length unsigned integer (little endian).
uint64_t checked_read_u64v(const byte* base, unsigned offset,
unsigned* length,
const char* msg = "expected LEB64") {
return checked_read_leb<uint64_t, false>(base, offset, length, msg);
}
// Reads a variable-length signed integer (little endian).
int64_t checked_read_i64v(const byte* base, unsigned offset, unsigned* length,
const char* msg = "expected SLEB64") {
uint64_t result =
checked_read_leb<uint64_t, true>(base, offset, length, msg);
if (*length == 10) return bit_cast<int64_t>(result);
if (*length > 0) {
int shift = 64 - 7 * *length;
// Perform sign extension.
return bit_cast<int64_t>(result << shift) >> shift;
}
return 0;
}
// Reads a single 16-bit unsigned integer (little endian).
inline uint16_t read_u16(const byte* ptr) {
DCHECK(ptr >= start_ && (ptr + 2) <= end_);
#if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK
return *reinterpret_cast<const uint16_t*>(ptr);
#else
uint16_t b0 = ptr[0];
uint16_t b1 = ptr[1];
return (b1 << 8) | b0;
#endif
}
// Reads a single 32-bit unsigned integer (little endian).
inline uint32_t read_u32(const byte* ptr) {
DCHECK(ptr >= start_ && (ptr + 4) <= end_);
#if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK
return *reinterpret_cast<const uint32_t*>(ptr);
#else
uint32_t b0 = ptr[0];
uint32_t b1 = ptr[1];
uint32_t b2 = ptr[2];
uint32_t b3 = ptr[3];
return (b3 << 24) | (b2 << 16) | (b1 << 8) | b0;
#endif
}
// Reads a single 64-bit unsigned integer (little endian).
inline uint64_t read_u64(const byte* ptr) {
DCHECK(ptr >= start_ && (ptr + 8) <= end_);
#if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK
return *reinterpret_cast<const uint64_t*>(ptr);
#else
uint32_t b0 = ptr[0];
uint32_t b1 = ptr[1];
uint32_t b2 = ptr[2];
uint32_t b3 = ptr[3];
uint32_t low = (b3 << 24) | (b2 << 16) | (b1 << 8) | b0;
uint32_t b4 = ptr[4];
uint32_t b5 = ptr[5];
uint32_t b6 = ptr[6];
uint32_t b7 = ptr[7];
uint64_t high = (b7 << 24) | (b6 << 16) | (b5 << 8) | b4;
return (high << 32) | low;
#endif
}
// Reads a 8-bit unsigned integer (byte) and advances {pc_}.
uint8_t consume_u8(const char* name = nullptr) {
TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
name ? name : "uint8_t");
if (checkAvailable(1)) {
byte val = *(pc_++);
TRACE("%02x = %d\n", val, val);
return val;
}
return traceOffEnd<uint8_t>();
}
// Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
uint16_t consume_u16(const char* name = nullptr) {
TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
name ? name : "uint16_t");
if (checkAvailable(2)) {
uint16_t val = read_u16(pc_);
TRACE("%02x %02x = %d\n", pc_[0], pc_[1], val);
pc_ += 2;
return val;
}
return traceOffEnd<uint16_t>();
}
// Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
uint32_t consume_u32(const char* name = nullptr) {
TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
name ? name : "uint32_t");
if (checkAvailable(4)) {
uint32_t val = read_u32(pc_);
TRACE("%02x %02x %02x %02x = %u\n", pc_[0], pc_[1], pc_[2], pc_[3], val);
pc_ += 4;
return val;
}
return traceOffEnd<uint32_t>();
}
// Reads a LEB128 variable-length 32-bit integer and advances {pc_}.
uint32_t consume_u32v(const char* name = nullptr) {
TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
name ? name : "varint");
if (checkAvailable(1)) {
const byte* pos = pc_;
const byte* end = pc_ + 5;
if (end > limit_) end = limit_;
uint32_t result = 0;
int shift = 0;
byte b = 0;
while (pc_ < end) {
b = *pc_++;
TRACE("%02x ", b);
result = result | ((b & 0x7F) << shift);
if ((b & 0x80) == 0) break;
shift += 7;
}
int length = static_cast<int>(pc_ - pos);
if (pc_ == end && (b & 0x80)) {
error(pc_ - 1, "varint too large");
} else if (length == 0) {
error(pc_, "varint of length 0");
} else {
TRACE("= %u\n", result);
}
return result;
}
return traceOffEnd<uint32_t>();
}
// Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
void consume_bytes(int size) {
if (checkAvailable(size)) {
pc_ += size;
} else {
pc_ = limit_;
}
}
// Check that at least {size} bytes exist between {pc_} and {limit_}.
bool checkAvailable(int size) {
intptr_t pc_overflow_value = std::numeric_limits<intptr_t>::max() - size;
if (size < 0 || (intptr_t)pc_ > pc_overflow_value) {
error(pc_, nullptr, "reading %d bytes would underflow/overflow", size);
return false;
} else if (pc_ < start_ || limit_ < (pc_ + size)) {
error(pc_, nullptr, "expected %d bytes, fell off end", size);
return false;
} else {
return true;
}
}
void error(const char* msg) { error(pc_, nullptr, "%s", msg); }
void error(const byte* pc, const char* msg) { error(pc, nullptr, "%s", msg); }
// Sets internal error state.
void PRINTF_FORMAT(4, 5)
error(const byte* pc, const byte* pt, const char* format, ...) {
if (ok()) {
#if DEBUG
if (FLAG_wasm_break_on_decoder_error) {
base::OS::DebugBreak();
}
#endif
const int kMaxErrorMsg = 256;
char* buffer = new char[kMaxErrorMsg];
va_list arguments;
va_start(arguments, format);
base::OS::VSNPrintF(buffer, kMaxErrorMsg - 1, format, arguments);
va_end(arguments);
error_msg_.Reset(buffer);
error_pc_ = pc;
error_pt_ = pt;
onFirstError();
}
}
// Behavior triggered on first error, overridden in subclasses.
virtual void onFirstError() {}
// Debugging helper to print bytes up to the end.
template <typename T>
T traceOffEnd() {
T t = 0;
for (const byte* ptr = pc_; ptr < limit_; ptr++) {
TRACE("%02x ", *ptr);
}
TRACE("<end>\n");
pc_ = limit_;
return t;
}
// Converts the given value to a {Result}, copying the error if necessary.
template <typename T>
Result<T> toResult(T val) {
Result<T> result;
if (error_pc_) {
TRACE("Result error: %s\n", error_msg_.get());
result.error_code = kError;
result.start = start_;
result.error_pc = error_pc_;
result.error_pt = error_pt_;
// transfer ownership of the error to the result.
result.error_msg.Reset(error_msg_.Detach());
} else {
result.error_code = kSuccess;
}
result.val = std::move(val);
return result;
}
// Resets the boundaries of this decoder.
void Reset(const byte* start, const byte* end) {
start_ = start;
pc_ = start;
limit_ = end;
end_ = end;
error_pc_ = nullptr;
error_pt_ = nullptr;
error_msg_.Reset(nullptr);
}
bool ok() const { return error_pc_ == nullptr; }
bool failed() const { return !error_msg_.is_empty(); }
bool more() const { return pc_ < limit_; }
const byte* start() { return start_; }
const byte* pc() { return pc_; }
uint32_t pc_offset() { return static_cast<uint32_t>(pc_ - start_); }
protected:
const byte* start_;
const byte* pc_;
const byte* limit_;
const byte* end_;
const byte* error_pc_;
const byte* error_pt_;
base::SmartArrayPointer<char> error_msg_;
private:
template <typename IntType, bool is_signed>
IntType checked_read_leb(const byte* base, unsigned offset, unsigned* length,
const char* msg) {
if (!check(base, offset, 1, msg)) {
*length = 0;
return 0;
}
const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
const byte* ptr = base + offset;
const byte* end = ptr + kMaxLength;
if (end > limit_) end = limit_;
int shift = 0;
byte b = 0;
IntType result = 0;
while (ptr < end) {
b = *ptr++;
result = result | (static_cast<IntType>(b & 0x7F) << shift);
if ((b & 0x80) == 0) break;
shift += 7;
}
DCHECK_LE(ptr - (base + offset), kMaxLength);
*length = static_cast<unsigned>(ptr - (base + offset));
if (ptr == end) {
// Check there are no bits set beyond the bitwidth of {IntType}.
const int kExtraBits = (1 + kMaxLength * 7) - (sizeof(IntType) * 8);
const byte kExtraBitsMask =
static_cast<byte>((0xFF << (8 - kExtraBits)) & 0xFF);
int extra_bits_value;
if (is_signed) {
// A signed-LEB128 must sign-extend the final byte, excluding its
// most-signifcant bit. e.g. for a 32-bit LEB128:
// kExtraBits = 4
// kExtraBitsMask = 0xf0
// If b is 0x0f, the value is negative, so extra_bits_value is 0x70.
// If b is 0x03, the value is positive, so extra_bits_value is 0x00.
extra_bits_value = (static_cast<int8_t>(b << kExtraBits) >> 8) &
kExtraBitsMask & ~0x80;
} else {
extra_bits_value = 0;
}
if (*length == kMaxLength && (b & kExtraBitsMask) != extra_bits_value) {
error(base, ptr, "extra bits in varint");
return 0;
}
if ((b & 0x80) != 0) {
error(base, ptr, "%s", msg);
return 0;
}
}
return result;
}
};
#undef TRACE
} // namespace wasm
} // namespace internal
} // namespace v8
#endif // V8_WASM_DECODER_H_