普通文本  |  391行  |  13.25 KB

#include "image_io/base/data_scanner.h"

namespace photos_editing_formats {
namespace image_io {

namespace {

const char kWhitespaceChars[] = " \t\n\r";

/// This function is like strspn but does not assume a null-terminated string.
size_t memspn(const char* s, size_t slen, const char* accept) {
  const char* p = s;
  const char* spanp;
  char c, sc;

cont:
  c = *p++;
  if (slen-- == 0) return p - 1 - s;
  for (spanp = accept; (sc = *spanp++) != '\0';)
    if (sc == c) goto cont;
  return p - 1 - s;
}

/// @return Whether value is in the range [lo:hi].
bool InRange(char value, char lo, char hi) {
  return value >= lo && value <= hi;
}

/// @return Whether the value is the first character of a kName type scanner.
bool IsFirstNameChar(char value) {
  return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' ||
         value == ':';
}

/// Scans the characters in the s string, where the characters can be any legal
/// character in the name.
/// @return The number of name characters scanned.
size_t ScanOptionalNameChars(const char* s, size_t slen) {
  const char* kOptionalChars =
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:";
  return memspn(s, slen, kOptionalChars);
}

/// Scans the whitespace characters in the s string.
/// @return The number of whitepace characters scanned.
size_t ScanWhitespaceChars(const char* s, size_t slen) {
  return memspn(s, slen, kWhitespaceChars);
}

}  // namespace

std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; }

DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) {
  return DataScanner(DataScanner::kLiteral, literal);
}

DataScanner DataScanner::CreateNameScanner() {
  return DataScanner(DataScanner::kName);
}

DataScanner DataScanner::CreateQuotedStringScanner() {
  return DataScanner(DataScanner::kQuotedString);
}

DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) {
  return DataScanner(DataScanner::kSentinel, sentinels);
}

DataScanner DataScanner::CreateThroughLiteralScanner(
    const std::string& literal) {
  return DataScanner(DataScanner::kThroughLiteral, literal);
}

DataScanner DataScanner::CreateWhitespaceScanner() {
  return DataScanner(DataScanner::kWhitespace);
}

DataScanner DataScanner::CreateOptionalWhitespaceScanner() {
  return DataScanner(DataScanner::kOptionalWhitespace);
}

size_t DataScanner::ExtendTokenLength(size_t delta_length) {
  token_range_ =
      DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length);
  return token_range_.GetLength();
}

void DataScanner::SetInternalError(const DataContext& context,
                                   const std::string& error_description,
                                   DataMatchResult* result) {
  result->SetType(DataMatchResult::kError);
  result->SetMessage(
      Message::kInternalError,
      context.GetErrorText({}, {GetDescription()}, error_description, ""));
}

void DataScanner::SetSyntaxError(const DataContext& context,
                                 const std::string& error_description,
                                 DataMatchResult* result) {
  result->SetType(DataMatchResult::kError);
  result->SetMessage(Message::kSyntaxError,
                     context.GetErrorText(error_description, GetDescription()));
}

DataMatchResult DataScanner::ScanLiteral(const char* cbytes,
                                         size_t bytes_available,
                                         const DataContext& context) {
  DataMatchResult result;
  size_t token_length = token_range_.GetLength();
  if (token_length >= literal_or_sentinels_.length()) {
    SetInternalError(context, "Literal already scanned", &result);
    return result;
  }
  size_t bytes_still_needed = literal_or_sentinels_.length() - token_length;
  size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
  if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) ==
      0) {
    token_length = ExtendTokenLength(bytes_to_compare);
    result.SetBytesConsumed(bytes_to_compare);
    result.SetType(token_length == literal_or_sentinels_.length()
                       ? DataMatchResult::kFull
                       : DataMatchResult::kPartialOutOfData);
  } else {
    SetSyntaxError(context, "Expected literal", &result);
  }
  return result;
}

DataMatchResult DataScanner::ScanName(const char* cbytes,
                                      size_t bytes_available,
                                      const DataContext& context) {
  DataMatchResult result;
  size_t token_length = token_range_.GetLength();
  if (token_length == 0) {
    if (!IsFirstNameChar(*cbytes)) {
      SetSyntaxError(context, "Expected first character of a name", &result);
      return result;
    }
    token_length = ExtendTokenLength(1);
    result.SetBytesConsumed(1);
    bytes_available -= 1;
    cbytes += 1;
  }
  size_t optional_bytes_consumed =
      ScanOptionalNameChars(cbytes, bytes_available);
  token_length = ExtendTokenLength(optional_bytes_consumed);
  result.IncrementBytesConsumed(optional_bytes_consumed);
  if (result.GetBytesConsumed() == 0 && token_length > 0) {
    result.SetType(DataMatchResult::kFull);
  } else if (optional_bytes_consumed < bytes_available) {
    result.SetType(DataMatchResult::kFull);
  } else {
    result.SetType(DataMatchResult::kPartialOutOfData);
  }
  return result;
}

DataMatchResult DataScanner::ScanQuotedString(const char* cbytes,
                                              size_t bytes_available,
                                              const DataContext& context) {
  const size_t kStart = 0;
  const size_t kDone = '.';
  const size_t kSquote = '\'';
  const size_t kDquote = '"';
  DataMatchResult result;
  size_t token_length = token_range_.GetLength();
  if ((data_ == kStart && token_length != 0) ||
      (data_ != kStart && data_ != kSquote && data_ != kDquote)) {
    SetInternalError(context, "Inconsistent state", &result);
    return result;
  }
  if (data_ == kStart) {
    if (*cbytes != kSquote && *cbytes != kDquote) {
      SetSyntaxError(context, "Expected start of a quoted string", &result);
      return result;
    }
    data_ = *cbytes++;
    bytes_available--;
    result.SetBytesConsumed(1);
    token_length = ExtendTokenLength(1);
  }
  const char* ebytes = reinterpret_cast<const char*>(
      memchr(cbytes, static_cast<int>(data_), bytes_available));
  size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available;
  result.IncrementBytesConsumed(bytes_scanned);
  token_length = ExtendTokenLength(bytes_scanned);
  if (bytes_scanned == bytes_available) {
    result.SetType(DataMatchResult::kPartialOutOfData);
  } else {
    result.SetType(DataMatchResult::kFull);
    result.IncrementBytesConsumed(1);
    ExtendTokenLength(1);
    data_ = kDone;
  }
  return result;
}

DataMatchResult DataScanner::ScanSentinel(const char* cbytes,
                                          size_t bytes_available,
                                          const DataContext& context) {
  DataMatchResult result;
  if (data_ != 0) {
    SetInternalError(context, "Sentinel already scanned", &result);
    return result;
  }
  char cbyte = *cbytes;
  for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) {
    char sentinel = literal_or_sentinels_[index];
    if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) {
      ExtendTokenLength(1);
      result.SetBytesConsumed(1).SetType(DataMatchResult::kFull);
      data_ = sentinel;
      break;
    }
  }
  if (result.GetBytesConsumed() == 0) {
    SetSyntaxError(context, "Expected sentinal character", &result);
  }
  return result;
}

DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes,
                                                size_t bytes_available,
                                                const DataContext& context) {
  DataMatchResult result;
  size_t& scanned_literal_length = data_;
  if (scanned_literal_length >= literal_or_sentinels_.length()) {
    SetInternalError(context, "Literal already scanned", &result);
    return result;
  }
  while (bytes_available > 0) {
    if (scanned_literal_length == 0) {
      // Literal scan not in progress. Find the first char of the literal.
      auto* matched_byte = reinterpret_cast<const char*>(
          memchr(cbytes, literal_or_sentinels_[0], bytes_available));
      if (matched_byte == nullptr) {
        // first char not found and chars exhausted.
        ExtendTokenLength(bytes_available);
        result.IncrementBytesConsumed(bytes_available);
        result.SetType(DataMatchResult::kPartialOutOfData);
        break;
      } else {
        // found the first char of the literal.
        size_t bytes_scanned = (matched_byte - cbytes) + 1;
        result.IncrementBytesConsumed(bytes_scanned);
        bytes_available -= bytes_scanned;
        cbytes += bytes_scanned;
        ExtendTokenLength(bytes_scanned);
        scanned_literal_length = 1;
      }
    }
    // check if the rest of the literal is there.
    size_t bytes_still_needed =
        literal_or_sentinels_.length() - scanned_literal_length;
    size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
    if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes,
                bytes_to_compare) == 0) {
      // Yes, the whole literal is there or chars are exhausted.
      ExtendTokenLength(bytes_to_compare);
      scanned_literal_length += bytes_to_compare;
      result.IncrementBytesConsumed(bytes_to_compare);
      result.SetType(scanned_literal_length == literal_or_sentinels_.length()
                         ? DataMatchResult::kFull
                         : DataMatchResult::kPartialOutOfData);
      break;
    }
    // false alarm, the firsts char of the literal were found, but not the
    // whole enchilada. Keep searching at one past the first char of the match.
    scanned_literal_length = 0;
  }
  return result;
}

DataMatchResult DataScanner::ScanWhitespace(const char* cbytes,
                                            size_t bytes_available,
                                            const DataContext& context) {
  DataMatchResult result;
  size_t token_length = token_range_.GetLength();
  result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available));
  token_length = ExtendTokenLength(result.GetBytesConsumed());
  if (result.GetBytesConsumed() == 0) {
    if (token_length == 0 && type_ == kWhitespace) {
      SetSyntaxError(context, "Expected whitespace", &result);
    } else {
      result.SetType(DataMatchResult::kFull);
    }
  } else {
    result.SetType((result.GetBytesConsumed() < bytes_available)
                       ? DataMatchResult::kFull
                       : DataMatchResult::kPartialOutOfData);
  }
  return result;
}

DataMatchResult DataScanner::Scan(const DataContext& context) {
  scan_call_count_ += 1;
  DataMatchResult result;
  if (!context.IsValidLocationAndRange()) {
    SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(),
                     &result);
    return result;
  }
  if (!token_range_.IsValid()) {
    token_range_ = DataRange(context.GetLocation(), context.GetLocation());
  }
  size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation();
  const char* cbytes = context.GetCharBytes();
  switch (type_) {
    case kLiteral:
      result = ScanLiteral(cbytes, bytes_available, context);
      break;
    case kName:
      result = ScanName(cbytes, bytes_available, context);
      break;
    case kQuotedString:
      result = ScanQuotedString(cbytes, bytes_available, context);
      break;
    case kSentinel:
      result = ScanSentinel(cbytes, bytes_available, context);
      break;
    case kThroughLiteral:
      result = ScanThroughLiteral(cbytes, bytes_available, context);
      break;
    case kWhitespace:
    case kOptionalWhitespace:
      result = ScanWhitespace(cbytes, bytes_available, context);
      break;
    default:
      SetInternalError(context, "Undefined scanner type", &result);
      break;
  }
  return result;
}

void DataScanner::ResetTokenRange() { token_range_ = DataRange(); }

void DataScanner::Reset() {
  data_ = 0;
  scan_call_count_ = 0;
  ResetTokenRange();
}

std::string DataScanner::GetDescription() const {
  std::string description;
  switch (type_) {
    case kLiteral:
      description = "Literal:'";
      description += literal_or_sentinels_;
      description += "'";
      break;
    case kName:
      description = "Name";
      break;
    case kQuotedString:
      description = "QuotedString";
      break;
    case kSentinel:
      description = "OneOf:'";
      description += literal_or_sentinels_;
      description += "'";
      break;
    case kThroughLiteral:
      description = "ThruLiteral:'";
      description += literal_or_sentinels_;
      description += "'";
      break;
    case kWhitespace:
      description = "Whitespace";
      break;
    case kOptionalWhitespace:
      description = "OptionalWhitespace";
      break;
  }
  return description;
}

std::string DataScanner::GetLiteral() const {
  return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_
                                                       : "";
}

std::string DataScanner::GetSentenels() const {
  return type_ == kSentinel ? literal_or_sentinels_ : "";
}

char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; }

}  // namespace image_io
}  // namespace photos_editing_formats