C++程序  |  474行  |  11.15 KB

/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "compile/Pseudolocalizer.h"

#include "util/Util.h"

using android::StringPiece;

namespace aapt {

// String basis to generate expansion
static const std::string kExpansionString =
    "one two three "
    "four five six seven eight nine ten eleven twelve thirteen "
    "fourteen fiveteen sixteen seventeen nineteen twenty";

// Special unicode characters to override directionality of the words
static const std::string kRlm = "\u200f";
static const std::string kRlo = "\u202e";
static const std::string kPdf = "\u202c";

// Placeholder marks
static const std::string kPlaceholderOpen = "\u00bb";
static const std::string kPlaceholderClose = "\u00ab";

static const char kArgStart = '{';
static const char kArgEnd = '}';

class PseudoMethodNone : public PseudoMethodImpl {
 public:
  std::string Text(const StringPiece& text) override { return text.to_string(); }
  std::string Placeholder(const StringPiece& text) override { return text.to_string(); }
};

class PseudoMethodBidi : public PseudoMethodImpl {
 public:
  std::string Text(const StringPiece& text) override;
  std::string Placeholder(const StringPiece& text) override;
};

class PseudoMethodAccent : public PseudoMethodImpl {
 public:
  PseudoMethodAccent() : depth_(0), word_count_(0), length_(0) {}
  std::string Start() override;
  std::string End() override;
  std::string Text(const StringPiece& text) override;
  std::string Placeholder(const StringPiece& text) override;

 private:
  size_t depth_;
  size_t word_count_;
  size_t length_;
};

Pseudolocalizer::Pseudolocalizer(Method method) : last_depth_(0) {
  SetMethod(method);
}

void Pseudolocalizer::SetMethod(Method method) {
  switch (method) {
    case Method::kNone:
      impl_ = util::make_unique<PseudoMethodNone>();
      break;
    case Method::kAccent:
      impl_ = util::make_unique<PseudoMethodAccent>();
      break;
    case Method::kBidi:
      impl_ = util::make_unique<PseudoMethodBidi>();
      break;
  }
}

std::string Pseudolocalizer::Text(const StringPiece& text) {
  std::string out;
  size_t depth = last_depth_;
  size_t lastpos, pos;
  const size_t length = text.size();
  const char* str = text.data();
  bool escaped = false;
  for (lastpos = pos = 0; pos < length; pos++) {
    char16_t c = str[pos];
    if (escaped) {
      escaped = false;
      continue;
    }
    if (c == '\'') {
      escaped = true;
      continue;
    }

    if (c == kArgStart) {
      depth++;
    } else if (c == kArgEnd && depth) {
      depth--;
    }

    if (last_depth_ != depth || pos == length - 1) {
      bool pseudo = ((last_depth_ % 2) == 0);
      size_t nextpos = pos;
      if (!pseudo || depth == last_depth_) {
        nextpos++;
      }
      size_t size = nextpos - lastpos;
      if (size) {
        std::string chunk = text.substr(lastpos, size).to_string();
        if (pseudo) {
          chunk = impl_->Text(chunk);
        } else if (str[lastpos] == kArgStart && str[nextpos - 1] == kArgEnd) {
          chunk = impl_->Placeholder(chunk);
        }
        out.append(chunk);
      }
      if (pseudo && depth < last_depth_) {  // End of message
        out.append(impl_->End());
      } else if (!pseudo && depth > last_depth_) {  // Start of message
        out.append(impl_->Start());
      }
      lastpos = nextpos;
      last_depth_ = depth;
    }
  }
  return out;
}

static const char* PseudolocalizeChar(const char c) {
  switch (c) {
    case 'a':
      return "\u00e5";
    case 'b':
      return "\u0253";
    case 'c':
      return "\u00e7";
    case 'd':
      return "\u00f0";
    case 'e':
      return "\u00e9";
    case 'f':
      return "\u0192";
    case 'g':
      return "\u011d";
    case 'h':
      return "\u0125";
    case 'i':
      return "\u00ee";
    case 'j':
      return "\u0135";
    case 'k':
      return "\u0137";
    case 'l':
      return "\u013c";
    case 'm':
      return "\u1e3f";
    case 'n':
      return "\u00f1";
    case 'o':
      return "\u00f6";
    case 'p':
      return "\u00fe";
    case 'q':
      return "\u0051";
    case 'r':
      return "\u0155";
    case 's':
      return "\u0161";
    case 't':
      return "\u0163";
    case 'u':
      return "\u00fb";
    case 'v':
      return "\u0056";
    case 'w':
      return "\u0175";
    case 'x':
      return "\u0445";
    case 'y':
      return "\u00fd";
    case 'z':
      return "\u017e";
    case 'A':
      return "\u00c5";
    case 'B':
      return "\u03b2";
    case 'C':
      return "\u00c7";
    case 'D':
      return "\u00d0";
    case 'E':
      return "\u00c9";
    case 'G':
      return "\u011c";
    case 'H':
      return "\u0124";
    case 'I':
      return "\u00ce";
    case 'J':
      return "\u0134";
    case 'K':
      return "\u0136";
    case 'L':
      return "\u013b";
    case 'M':
      return "\u1e3e";
    case 'N':
      return "\u00d1";
    case 'O':
      return "\u00d6";
    case 'P':
      return "\u00de";
    case 'Q':
      return "\u0071";
    case 'R':
      return "\u0154";
    case 'S':
      return "\u0160";
    case 'T':
      return "\u0162";
    case 'U':
      return "\u00db";
    case 'V':
      return "\u03bd";
    case 'W':
      return "\u0174";
    case 'X':
      return "\u00d7";
    case 'Y':
      return "\u00dd";
    case 'Z':
      return "\u017d";
    case '!':
      return "\u00a1";
    case '?':
      return "\u00bf";
    case '$':
      return "\u20ac";
    default:
      return nullptr;
  }
}

static bool IsPossibleNormalPlaceholderEnd(const char c) {
  switch (c) {
    case 's':
      return true;
    case 'S':
      return true;
    case 'c':
      return true;
    case 'C':
      return true;
    case 'd':
      return true;
    case 'o':
      return true;
    case 'x':
      return true;
    case 'X':
      return true;
    case 'f':
      return true;
    case 'e':
      return true;
    case 'E':
      return true;
    case 'g':
      return true;
    case 'G':
      return true;
    case 'a':
      return true;
    case 'A':
      return true;
    case 'b':
      return true;
    case 'B':
      return true;
    case 'h':
      return true;
    case 'H':
      return true;
    case '%':
      return true;
    case 'n':
      return true;
    default:
      return false;
  }
}

static std::string PseudoGenerateExpansion(const unsigned int length) {
  std::string result = kExpansionString;
  const char* s = result.data();
  if (result.size() < length) {
    result += " ";
    result += PseudoGenerateExpansion(length - result.size());
  } else {
    int ext = 0;
    // Should contain only whole words, so looking for a space
    for (unsigned int i = length + 1; i < result.size(); ++i) {
      ++ext;
      if (s[i] == ' ') {
        break;
      }
    }
    result = result.substr(0, length + ext);
  }
  return result;
}

std::string PseudoMethodAccent::Start() {
  std::string result;
  if (depth_ == 0) {
    result = "[";
  }
  word_count_ = length_ = 0;
  depth_++;
  return result;
}

std::string PseudoMethodAccent::End() {
  std::string result;
  if (length_) {
    result += " ";
    result += PseudoGenerateExpansion(word_count_ > 3 ? length_ : length_ / 2);
  }
  word_count_ = length_ = 0;
  depth_--;
  if (depth_ == 0) {
    result += "]";
  }
  return result;
}

/**
 * Converts characters so they look like they've been localized.
 *
 * Note: This leaves placeholder syntax untouched.
 */
std::string PseudoMethodAccent::Text(const StringPiece& source) {
  const char* s = source.data();
  std::string result;
  const size_t I = source.size();
  bool lastspace = true;
  for (size_t i = 0; i < I; i++) {
    char c = s[i];
    if (c == '%') {
      // Placeholder syntax, no need to pseudolocalize
      std::string chunk;
      bool end = false;
      chunk.append(&c, 1);
      while (!end && i + 1 < I) {
        ++i;
        c = s[i];
        chunk.append(&c, 1);
        if (IsPossibleNormalPlaceholderEnd(c)) {
          end = true;
        } else if (i + 1 < I && c == 't') {
          ++i;
          c = s[i];
          chunk.append(&c, 1);
          end = true;
        }
      }
      // Treat chunk as a placeholder unless it ends with %.
      result += ((c == '%') ? chunk : Placeholder(chunk));
    } else if (c == '<' || c == '&') {
      // html syntax, no need to pseudolocalize
      bool tag_closed = false;
      while (!tag_closed && i < I) {
        if (c == '&') {
          std::string escape_text;
          escape_text.append(&c, 1);
          bool end = false;
          size_t html_code_pos = i;
          while (!end && html_code_pos < I) {
            ++html_code_pos;
            c = s[html_code_pos];
            escape_text.append(&c, 1);
            // Valid html code
            if (c == ';') {
              end = true;
              i = html_code_pos;
            }
            // Wrong html code
            else if (!((c == '#' || (c >= 'a' && c <= 'z') ||
                        (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')))) {
              end = true;
            }
          }
          result += escape_text;
          if (escape_text != "&lt;") {
            tag_closed = true;
          }
          continue;
        }
        if (c == '>') {
          tag_closed = true;
          result.append(&c, 1);
          continue;
        }
        result.append(&c, 1);
        i++;
        c = s[i];
      }
    } else {
      // This is a pure text that should be pseudolocalized
      const char* p = PseudolocalizeChar(c);
      if (p != nullptr) {
        result += p;
      } else {
        bool space = isspace(c);
        if (lastspace && !space) {
          word_count_++;
        }
        lastspace = space;
        result.append(&c, 1);
      }
      // Count only pseudolocalizable chars and delimiters
      length_++;
    }
  }
  return result;
}

std::string PseudoMethodAccent::Placeholder(const StringPiece& source) {
  // Surround a placeholder with brackets
  return kPlaceholderOpen + source.to_string() + kPlaceholderClose;
}

std::string PseudoMethodBidi::Text(const StringPiece& source) {
  const char* s = source.data();
  std::string result;
  bool lastspace = true;
  bool space = true;
  for (size_t i = 0; i < source.size(); i++) {
    char c = s[i];
    space = isspace(c);
    if (lastspace && !space) {
      // Word start
      result += kRlm + kRlo;
    } else if (!lastspace && space) {
      // Word end
      result += kPdf + kRlm;
    }
    lastspace = space;
    result.append(&c, 1);
  }
  if (!lastspace) {
    // End of last word
    result += kPdf + kRlm;
  }
  return result;
}

std::string PseudoMethodBidi::Placeholder(const StringPiece& source) {
  // Surround a placeholder with directionality change sequence
  return kRlm + kRlo + source.to_string() + kPdf + kRlm;
}

}  // namespace aapt