普通文本  |  403行  |  13.55 KB

// Copyright 2015 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////

#include "src/binary_parse/range_checked_byte_ptr.h"

#include <assert.h>
#include <cstddef>
#include <cstring>

namespace piex {
namespace binary_parse {

#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
#define BREAK_IF_DEBUGGING() assert(false)
#else
#define BREAK_IF_DEBUGGING() assert(true)
#endif

namespace {
class MemoryPagedByteArray : public PagedByteArray {
 public:
  MemoryPagedByteArray(const unsigned char *buffer, const size_t len);

  virtual size_t length() const;
  virtual size_t pageSize() const;
  virtual void getPage(size_t page_index, const unsigned char **begin,
                       const unsigned char **end, PagePtr *page) const;

 private:
  const unsigned char *buffer_;
  const size_t len_;
};

MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
                                           const size_t len)
    : buffer_(buffer), len_(len) {}

size_t MemoryPagedByteArray::length() const { return len_; }

size_t MemoryPagedByteArray::pageSize() const { return len_; }

void MemoryPagedByteArray::getPage(size_t /* page_index */,
                                   const unsigned char **begin,
                                   const unsigned char **end,
                                   PagePtr *page) const {
  *begin = buffer_;
  *end = buffer_ + len_;
  *page = PagePtr();
}

// A functor that does nothing. This is used as a no-op shared pointer
// deallocator below.
class NullFunctor {
 public:
  void operator()() {}
  void operator()(PagedByteArray * /* p */) const {}
};
}  // namespace

PagedByteArray::~PagedByteArray() {}

RangeCheckedBytePtr::RangeCheckedBytePtr()
    : array_(),
      page_data_(NULL),
      current_pos_(0),
      sub_array_begin_(0),
      sub_array_end_(0),
      page_begin_offset_(0),
      current_page_len_(0),
      error_flag_(RANGE_CHECKED_BYTE_ERROR) {}

RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
                                         const size_t len)
    : array_(new MemoryPagedByteArray(array, len)),
      page_data_(NULL),
      current_pos_(0),
      sub_array_begin_(0),
      sub_array_end_(len),
      page_begin_offset_(0),
      current_page_len_(0),
      error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
  assert(array);
  if (array == NULL) {
    error_flag_ = RANGE_CHECKED_BYTE_ERROR;
  }
}

RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
    : array_(array, NullFunctor()),
      page_data_(NULL),
      current_pos_(0),
      sub_array_begin_(0),
      sub_array_end_(array->length()),
      page_begin_offset_(0),
      current_page_len_(0),
      error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}

RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
  return RangeCheckedBytePtr();
}

RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
    size_t pos, size_t length) const {
  RangeCheckedBytePtr sub_result = (*this) + pos;
  if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
    sub_result.sub_array_begin_ = sub_result.current_pos_;
    sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;

    // Restrict the boundaries of the current page to the newly set sub-array.
    sub_result.restrictPageToSubArray();

    return sub_result;
  } else {
    error_flag_ = RANGE_CHECKED_BYTE_ERROR;
    return invalidPointer();
  }
}

size_t RangeCheckedBytePtr::offsetInArray() const {
  // sub_array_begin_ <= current_pos_ is a class invariant, but protect
  // against violations of this invariant.
  if (sub_array_begin_ <= current_pos_) {
    return current_pos_ - sub_array_begin_;
  } else {
    assert(false);
    return 0;
  }
}

std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
  std::vector<unsigned char> bytes = extractBytes(pos, length);
  std::string result;
  result.reserve(bytes.size());
  for (size_t i = 0; i < bytes.size(); ++i) {
    result.push_back(static_cast<char>(bytes[i]));
  }
  return result;
}

std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
    size_t pos, size_t length) const {
  std::vector<unsigned char> result;
  if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
    BREAK_IF_DEBUGGING();
    error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
    return result;
  }
  result.reserve(length);
  for (size_t i = 0; i < length; ++i) {
    result.push_back((*this)[pos + i]);
  }
  return result;
}

bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
  if (x.array_ != y.array_) {
    assert(false);
    return false;
  }

  return x.current_pos_ == y.current_pos_;
}

bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
  return !(x == y);
}

void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
  // The offset should always lie within the bounds of the sub-array (this
  // condition is enforced at the callsite). However, even if the offset lies
  // outside the sub-array, the restrictPageToSubArray() call at the end
  // ensures that the object is left in a consistent state that maintains the
  // class invariants.
  assert(offset >= sub_array_begin_ && offset < sub_array_end_);

  // Ensure that offset lies within the array.
  if (offset >= array_->length()) {
    assert(false);
    return;
  }

  // Determine the index of the page to request.
  size_t page_index = offset / array_->pageSize();

  // Get the page.
  const unsigned char *page_begin;
  const unsigned char *page_end;
  array_->getPage(page_index, &page_begin, &page_end, &page_);

  // Ensure that the page has the expected length (as specified in the
  // PagedByteArray interface).
  size_t expected_page_size = array_->pageSize();
  if (page_index == (array_->length() - 1) / array_->pageSize()) {
    expected_page_size = array_->length() - array_->pageSize() * page_index;
  }
  if ((page_end < page_begin) ||
      (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
    assert(false);
    return;
  }

  // Remember information about page.
  page_data_ = page_begin;
  page_begin_offset_ = page_index * array_->pageSize();
  current_page_len_ = static_cast<size_t>(page_end - page_begin);

  // Restrict the boundaries of the page to lie within the sub-array.
  restrictPageToSubArray();
}

void RangeCheckedBytePtr::restrictPageToSubArray() const {
  // Restrict the current page's boundaries so that it is always contained
  // completely within the extent of the sub-array.
  // This function is purposely designed to work correctly in the following
  // two special cases:
  // a) The current page lies entirely outside the sub-array. In this case,
  //    current_page_len_ will be set to zero. page_data_ may either remain
  //    unchanged or may be changed to point one element beyond the end of the
  //    page, depending on whether the current page lies before or after the
  //    sub-array.
  // b) The current page is in the state as initialized by the constructor
  //    (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
  //    page_data_ and current_page_len_ will remain unchanged.

  // Does the beginning of the page lie before the beginning of the sub-array?
  if (page_begin_offset_ < sub_array_begin_) {
    // Compute amount by which to shorten page.
    size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
    if (amount_to_shorten > current_page_len_) {
      amount_to_shorten = current_page_len_;
    }

    // Adjust beginning of page accordingly.
    page_begin_offset_ += amount_to_shorten;
    page_data_ += amount_to_shorten;
    current_page_len_ -= amount_to_shorten;
  }

  // Does the end of the page lie beyond the end of the sub-array?
  if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
    // Reduce length of page accordingly.
    size_t new_len = sub_array_end_ - page_begin_offset_;
    if (new_len > current_page_len_) {
      new_len = current_page_len_;
    }
    current_page_len_ = new_len;
  }
}

int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
           size_t num) {
  std::vector<unsigned char> x_vec = x.extractBytes(0, num);
  std::vector<unsigned char> y_vec = y.extractBytes(0, num);

  if (!x.errorOccurred() && !y.errorOccurred()) {
    return ::memcmp(&x_vec[0], &y_vec[0], num);
  } else {
    // return an arbitrary value
    return -1;
  }
}

int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
  std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());

  if (!x.errorOccurred()) {
    return ::memcmp(&x_vec[0], y.c_str(), y.length());
  } else {
    // return an arbitrary value
    return -1;
  }
}

size_t strlen(const RangeCheckedBytePtr &src) {
  size_t len = 0;
  RangeCheckedBytePtr str = src;
  while (!str.errorOccurred() && (str[0] != '\0')) {
    str++;
    len++;
  }
  return len;
}

int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
             MemoryStatus *status) {
  const uint16 unsigned_value = Get16u(input, big_endian, status);
  if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
    // Return an arbitrary value.
    return 0;
  }

  // Convert the two's-complement signed integer encoded in 'unsigned_value'
  // into a signed representation in the implementation's native representation
  // for signed integers. An optimized Blaze build (x64) compiles all of the
  // following code to a no-op (as of this writing).
  // For further details, see the corresponding comment in Get32s().
  if (unsigned_value == 0x8000u) {
    return static_cast<int16>(-0x8000);
  } else if (unsigned_value > 0x8000u) {
    return -static_cast<int16>(0x10000u - unsigned_value);
  } else {
    return static_cast<int16>(unsigned_value);
  }
}

uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
              MemoryStatus *status) {
  if (input.remainingLength() < 2) {
    if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
      *status = RANGE_CHECKED_BYTE_ERROR;
    }
    // Return an arbitrary value.
    return 0;
  }
  if (big_endian) {
    return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
  } else {
    return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
  }
}

int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
             MemoryStatus *status) {
  const uint32 unsigned_value = Get32u(input, big_endian, status);
  if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
    // Return an arbitrary value.
    return 0;
  }

  // Convert the two's-complement signed integer encoded in 'unsigned_value'
  // into a signed representation in the implementation's native representation
  // for signed integers.
  // For all practical purposes, the same result could be obtained simply by
  // casting unsigned_value to int32; the result of this is
  // implementation-defined, but on all of the platforms we care about, it does
  // what we want.
  // The code below, however, arguably has the aesthetic advantage of being
  // independent of the representation for signed integers chosen by the
  // implementation, as long as 'int' and 'unsigned' have the required range to
  // represent all of the required values.
  // An optimized Blaze build (x64) compiles all of the following code to a
  // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
  // is left unchanged.
  if (unsigned_value == 0x80000000u) {
    // Read here on why the constant expression is written this way:
    // http://stackoverflow.com/questions/14695118
    return -0x7fffffff - 1;
  } else if (unsigned_value > 0x80000000u) {
    // The expression
    //   0xffffffffu - unsigned_value + 1
    // is a portable way of flipping the sign of a twos-complement signed
    // integer whose binary representation is stored in an unsigned integer.
    // '0xffffffffu + 1' is used in preference to simply '0' because it makes
    // it clearer that the correct result will be obtained even if an int is
    // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
    // 'unsigned_value' to prevent the compiler from warning about an
    // integral constant overflow. ('0' would produce the correct result in
    // this case too but would rely in a more subtle way on the rules for
    // unsigned wraparound.)
    return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
  } else {
    return static_cast<int32>(unsigned_value);
  }
}

uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
              MemoryStatus *status) {
  if (input.remainingLength() < 4) {
    if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
      *status = RANGE_CHECKED_BYTE_ERROR;
    }
    // Return an arbitrary value.
    return 0;
  }
  if (big_endian) {
    return (static_cast<uint32>(input[0]) << 24) |
           (static_cast<uint32>(input[1]) << 16) |
           (static_cast<uint32>(input[2]) << 8) |
           (static_cast<uint32>(input[3]) << 0);
  } else {
    return (static_cast<uint32>(input[3]) << 24) |
           (static_cast<uint32>(input[2]) << 16) |
           (static_cast<uint32>(input[1]) << 8) |
           (static_cast<uint32>(input[0]) << 0);
  }
}

}  // namespace binary_parse
}  // namespace piex