// Copyright 2015 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////
#include "src/binary_parse/range_checked_byte_ptr.h"
#include <assert.h>
#include <cstddef>
#include <cstring>
namespace piex {
namespace binary_parse {
#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
#define BREAK_IF_DEBUGGING() assert(false)
#else
#define BREAK_IF_DEBUGGING() assert(true)
#endif
namespace {
class MemoryPagedByteArray : public PagedByteArray {
public:
MemoryPagedByteArray(const unsigned char *buffer, const size_t len);
virtual size_t length() const;
virtual size_t pageSize() const;
virtual void getPage(size_t page_index, const unsigned char **begin,
const unsigned char **end, PagePtr *page) const;
private:
const unsigned char *buffer_;
const size_t len_;
};
MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
const size_t len)
: buffer_(buffer), len_(len) {}
size_t MemoryPagedByteArray::length() const { return len_; }
size_t MemoryPagedByteArray::pageSize() const { return len_; }
void MemoryPagedByteArray::getPage(size_t /* page_index */,
const unsigned char **begin,
const unsigned char **end,
PagePtr *page) const {
*begin = buffer_;
*end = buffer_ + len_;
*page = PagePtr();
}
// A functor that does nothing. This is used as a no-op shared pointer
// deallocator below.
class NullFunctor {
public:
void operator()() {}
void operator()(PagedByteArray * /* p */) const {}
};
} // namespace
PagedByteArray::~PagedByteArray() {}
RangeCheckedBytePtr::RangeCheckedBytePtr()
: array_(),
page_data_(NULL),
current_pos_(0),
sub_array_begin_(0),
sub_array_end_(0),
page_begin_offset_(0),
current_page_len_(0),
error_flag_(RANGE_CHECKED_BYTE_ERROR) {}
RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
const size_t len)
: array_(new MemoryPagedByteArray(array, len)),
page_data_(NULL),
current_pos_(0),
sub_array_begin_(0),
sub_array_end_(len),
page_begin_offset_(0),
current_page_len_(0),
error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
assert(array);
if (array == NULL) {
error_flag_ = RANGE_CHECKED_BYTE_ERROR;
}
}
RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
: array_(array, NullFunctor()),
page_data_(NULL),
current_pos_(0),
sub_array_begin_(0),
sub_array_end_(array->length()),
page_begin_offset_(0),
current_page_len_(0),
error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}
RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
return RangeCheckedBytePtr();
}
RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
size_t pos, size_t length) const {
RangeCheckedBytePtr sub_result = (*this) + pos;
if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
sub_result.sub_array_begin_ = sub_result.current_pos_;
sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;
// Restrict the boundaries of the current page to the newly set sub-array.
sub_result.restrictPageToSubArray();
return sub_result;
} else {
error_flag_ = RANGE_CHECKED_BYTE_ERROR;
return invalidPointer();
}
}
size_t RangeCheckedBytePtr::offsetInArray() const {
// sub_array_begin_ <= current_pos_ is a class invariant, but protect
// against violations of this invariant.
if (sub_array_begin_ <= current_pos_) {
return current_pos_ - sub_array_begin_;
} else {
assert(false);
return 0;
}
}
std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
std::vector<unsigned char> bytes = extractBytes(pos, length);
std::string result;
result.reserve(bytes.size());
for (size_t i = 0; i < bytes.size(); ++i) {
result.push_back(static_cast<char>(bytes[i]));
}
return result;
}
std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
size_t pos, size_t length) const {
std::vector<unsigned char> result;
if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
BREAK_IF_DEBUGGING();
error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
return result;
}
result.reserve(length);
for (size_t i = 0; i < length; ++i) {
result.push_back((*this)[pos + i]);
}
return result;
}
bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
if (x.array_ != y.array_) {
assert(false);
return false;
}
return x.current_pos_ == y.current_pos_;
}
bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
return !(x == y);
}
void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
// The offset should always lie within the bounds of the sub-array (this
// condition is enforced at the callsite). However, even if the offset lies
// outside the sub-array, the restrictPageToSubArray() call at the end
// ensures that the object is left in a consistent state that maintains the
// class invariants.
assert(offset >= sub_array_begin_ && offset < sub_array_end_);
// Ensure that offset lies within the array.
if (offset >= array_->length()) {
assert(false);
return;
}
// Determine the index of the page to request.
size_t page_index = offset / array_->pageSize();
// Get the page.
const unsigned char *page_begin;
const unsigned char *page_end;
array_->getPage(page_index, &page_begin, &page_end, &page_);
// Ensure that the page has the expected length (as specified in the
// PagedByteArray interface).
size_t expected_page_size = array_->pageSize();
if (page_index == (array_->length() - 1) / array_->pageSize()) {
expected_page_size = array_->length() - array_->pageSize() * page_index;
}
if ((page_end < page_begin) ||
(static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
assert(false);
return;
}
// Remember information about page.
page_data_ = page_begin;
page_begin_offset_ = page_index * array_->pageSize();
current_page_len_ = static_cast<size_t>(page_end - page_begin);
// Restrict the boundaries of the page to lie within the sub-array.
restrictPageToSubArray();
}
void RangeCheckedBytePtr::restrictPageToSubArray() const {
// Restrict the current page's boundaries so that it is always contained
// completely within the extent of the sub-array.
// This function is purposely designed to work correctly in the following
// two special cases:
// a) The current page lies entirely outside the sub-array. In this case,
// current_page_len_ will be set to zero. page_data_ may either remain
// unchanged or may be changed to point one element beyond the end of the
// page, depending on whether the current page lies before or after the
// sub-array.
// b) The current page is in the state as initialized by the constructor
// (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
// page_data_ and current_page_len_ will remain unchanged.
// Does the beginning of the page lie before the beginning of the sub-array?
if (page_begin_offset_ < sub_array_begin_) {
// Compute amount by which to shorten page.
size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
if (amount_to_shorten > current_page_len_) {
amount_to_shorten = current_page_len_;
}
// Adjust beginning of page accordingly.
page_begin_offset_ += amount_to_shorten;
page_data_ += amount_to_shorten;
current_page_len_ -= amount_to_shorten;
}
// Does the end of the page lie beyond the end of the sub-array?
if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
// Reduce length of page accordingly.
size_t new_len = sub_array_end_ - page_begin_offset_;
if (new_len > current_page_len_) {
new_len = current_page_len_;
}
current_page_len_ = new_len;
}
}
int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
size_t num) {
std::vector<unsigned char> x_vec = x.extractBytes(0, num);
std::vector<unsigned char> y_vec = y.extractBytes(0, num);
if (!x.errorOccurred() && !y.errorOccurred()) {
return ::memcmp(&x_vec[0], &y_vec[0], num);
} else {
// return an arbitrary value
return -1;
}
}
int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());
if (!x.errorOccurred()) {
return ::memcmp(&x_vec[0], y.c_str(), y.length());
} else {
// return an arbitrary value
return -1;
}
}
size_t strlen(const RangeCheckedBytePtr &src) {
size_t len = 0;
RangeCheckedBytePtr str = src;
while (!str.errorOccurred() && (str[0] != '\0')) {
str++;
len++;
}
return len;
}
int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
MemoryStatus *status) {
const uint16 unsigned_value = Get16u(input, big_endian, status);
if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
// Return an arbitrary value.
return 0;
}
// Convert the two's-complement signed integer encoded in 'unsigned_value'
// into a signed representation in the implementation's native representation
// for signed integers. An optimized Blaze build (x64) compiles all of the
// following code to a no-op (as of this writing).
// For further details, see the corresponding comment in Get32s().
if (unsigned_value == 0x8000u) {
return static_cast<int16>(-0x8000);
} else if (unsigned_value > 0x8000u) {
return -static_cast<int16>(0x10000u - unsigned_value);
} else {
return static_cast<int16>(unsigned_value);
}
}
uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
MemoryStatus *status) {
if (input.remainingLength() < 2) {
if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
*status = RANGE_CHECKED_BYTE_ERROR;
}
// Return an arbitrary value.
return 0;
}
if (big_endian) {
return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
} else {
return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
}
}
int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
MemoryStatus *status) {
const uint32 unsigned_value = Get32u(input, big_endian, status);
if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
// Return an arbitrary value.
return 0;
}
// Convert the two's-complement signed integer encoded in 'unsigned_value'
// into a signed representation in the implementation's native representation
// for signed integers.
// For all practical purposes, the same result could be obtained simply by
// casting unsigned_value to int32; the result of this is
// implementation-defined, but on all of the platforms we care about, it does
// what we want.
// The code below, however, arguably has the aesthetic advantage of being
// independent of the representation for signed integers chosen by the
// implementation, as long as 'int' and 'unsigned' have the required range to
// represent all of the required values.
// An optimized Blaze build (x64) compiles all of the following code to a
// no-op (as of this writing); i.e. the value that Get32u() returned in %eax
// is left unchanged.
if (unsigned_value == 0x80000000u) {
// Read here on why the constant expression is written this way:
// http://stackoverflow.com/questions/14695118
return -0x7fffffff - 1;
} else if (unsigned_value > 0x80000000u) {
// The expression
// 0xffffffffu - unsigned_value + 1
// is a portable way of flipping the sign of a twos-complement signed
// integer whose binary representation is stored in an unsigned integer.
// '0xffffffffu + 1' is used in preference to simply '0' because it makes
// it clearer that the correct result will be obtained even if an int is
// greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
// 'unsigned_value' to prevent the compiler from warning about an
// integral constant overflow. ('0' would produce the correct result in
// this case too but would rely in a more subtle way on the rules for
// unsigned wraparound.)
return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
} else {
return static_cast<int32>(unsigned_value);
}
}
uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
MemoryStatus *status) {
if (input.remainingLength() < 4) {
if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
*status = RANGE_CHECKED_BYTE_ERROR;
}
// Return an arbitrary value.
return 0;
}
if (big_endian) {
return (static_cast<uint32>(input[0]) << 24) |
(static_cast<uint32>(input[1]) << 16) |
(static_cast<uint32>(input[2]) << 8) |
(static_cast<uint32>(input[3]) << 0);
} else {
return (static_cast<uint32>(input[3]) << 24) |
(static_cast<uint32>(input[2]) << 16) |
(static_cast<uint32>(input[1]) << 8) |
(static_cast<uint32>(input[0]) << 0);
}
}
} // namespace binary_parse
} // namespace piex