C++程序  |  371行  |  13.11 KB

/*
 * Copyright (C) 2016 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "compile/PseudolocaleGenerator.h"

#include <algorithm>

#include "ResourceTable.h"
#include "ResourceValues.h"
#include "ValueVisitor.h"
#include "compile/Pseudolocalizer.h"
#include "util/Util.h"

using android::StringPiece;
using android::StringPiece16;

namespace aapt {

// The struct that represents both Span objects and UntranslatableSections.
struct UnifiedSpan {
  // Only present for Span objects. If not present, this was an UntranslatableSection.
  Maybe<std::string> tag;

  // The UTF-16 index into the string where this span starts.
  uint32_t first_char;

  // The UTF-16 index into the string where this span ends, inclusive.
  uint32_t last_char;
};

inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
  if (left.first_char < right.first_char) {
    return true;
  } else if (left.first_char > right.first_char) {
    return false;
  } else if (left.last_char < right.last_char) {
    return true;
  }
  return false;
}

inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
  return UnifiedSpan{*span.name, span.first_char, span.last_char};
}

inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
  return UnifiedSpan{
      {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
}

// Merges the Span and UntranslatableSections of this StyledString into a single vector of
// UnifiedSpans. This will first check that the Spans are sorted in ascending order.
static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
  // Ensure the Spans are sorted and converted.
  std::vector<UnifiedSpan> sorted_spans;
  sorted_spans.reserve(string.value->spans.size());
  std::transform(string.value->spans.begin(), string.value->spans.end(),
                 std::back_inserter(sorted_spans), SpanToUnifiedSpan);

  // Stable sort to ensure tag sequences like "<b><i>" are preserved.
  std::stable_sort(sorted_spans.begin(), sorted_spans.end());

  // Ensure the UntranslatableSections are sorted and converted.
  std::vector<UnifiedSpan> sorted_untranslatable_sections;
  sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
  std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
                 std::back_inserter(sorted_untranslatable_sections),
                 UntranslatableSectionToUnifiedSpan);
  std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());

  std::vector<UnifiedSpan> merged_spans;
  merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
  auto span_iter = sorted_spans.begin();
  auto untranslatable_iter = sorted_untranslatable_sections.begin();
  while (span_iter != sorted_spans.end() &&
         untranslatable_iter != sorted_untranslatable_sections.end()) {
    if (*span_iter < *untranslatable_iter) {
      merged_spans.push_back(std::move(*span_iter));
      ++span_iter;
    } else {
      merged_spans.push_back(std::move(*untranslatable_iter));
      ++untranslatable_iter;
    }
  }

  while (span_iter != sorted_spans.end()) {
    merged_spans.push_back(std::move(*span_iter));
    ++span_iter;
  }

  while (untranslatable_iter != sorted_untranslatable_sections.end()) {
    merged_spans.push_back(std::move(*untranslatable_iter));
    ++untranslatable_iter;
  }
  return merged_spans;
}

std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
                                                         Pseudolocalizer::Method method,
                                                         StringPool* pool) {
  Pseudolocalizer localizer(method);

  // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
  // This will effectively subdivide the string into multiple sections that can be individually
  // pseudolocalized, while keeping the span indices synchronized.
  std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);

  // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
  // runtime. So we will do all our processing in UTF-16, then convert back.
  const std::u16string text16 = util::Utf8ToUtf16(*string->value->str);

  // Convenient wrapper around the text that allows us to work with StringPieces.
  const StringPiece16 text(text16);

  // The new string.
  std::string new_string = localizer.Start();

  // The stack that keeps track of what nested Span we're in.
  std::vector<size_t> span_stack;

  // The current position in the original text.
  uint32_t cursor = 0u;

  // The current position in the new text.
  uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
                                             new_string.size(), false);

  // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
  bool translatable = true;
  size_t span_idx = 0u;
  while (span_idx < merged_spans.size() || !span_stack.empty()) {
    UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
    UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];

    if (span != nullptr) {
      if (parent_span == nullptr || parent_span->last_char > span->first_char) {
        // There is no parent, or this span is the child of the parent.
        // Pseudolocalize all the text until this span.
        const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
        cursor += substr.size();

        // Pseudolocalize the substring.
        std::string new_substr = util::Utf16ToUtf8(substr);
        if (translatable) {
          new_substr = localizer.Text(new_substr);
        }
        new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
                                           new_substr.size(), false);
        new_string += new_substr;

        // Rewrite the first_char.
        span->first_char = new_cursor;
        if (!span->tag) {
          // An untranslatable section has begun!
          translatable = false;
        }
        span_stack.push_back(span_idx);
        ++span_idx;
        continue;
      }
    }

    if (parent_span != nullptr) {
      // There is a parent, and either this span is not a child of it, or there are no more spans.
      // Pop this off the stack.
      const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
      cursor += substr.size();

      // Pseudolocalize the substring.
      std::string new_substr = util::Utf16ToUtf8(substr);
      if (translatable) {
        new_substr = localizer.Text(new_substr);
      }
      new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
                                         new_substr.size(), false);
      new_string += new_substr;

      parent_span->last_char = new_cursor - 1;
      if (parent_span->tag) {
        // An end to an untranslatable section.
        translatable = true;
      }
      span_stack.pop_back();
    }
  }

  // Finish the pseudolocalization at the end of the string.
  new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
  new_string += localizer.End();

  StyleString localized;
  localized.str = std::move(new_string);

  // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
  for (UnifiedSpan& span : merged_spans) {
    if (span.tag) {
      localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
    }
  }
  return util::make_unique<StyledString>(pool->MakeRef(localized));
}

namespace {

class Visitor : public RawValueVisitor {
 public:
  // Either value or item will be populated upon visiting the value.
  std::unique_ptr<Value> value;
  std::unique_ptr<Item> item;

  Visitor(StringPool* pool, Pseudolocalizer::Method method)
      : pool_(pool), method_(method), localizer_(method) {}

  void Visit(Plural* plural) override {
    std::unique_ptr<Plural> localized = util::make_unique<Plural>();
    for (size_t i = 0; i < plural->values.size(); i++) {
      Visitor sub_visitor(pool_, method_);
      if (plural->values[i]) {
        plural->values[i]->Accept(&sub_visitor);
        if (sub_visitor.value) {
          localized->values[i] = std::move(sub_visitor.item);
        } else {
          localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
        }
      }
    }
    localized->SetSource(plural->GetSource());
    localized->SetWeak(true);
    value = std::move(localized);
  }

  void Visit(String* string) override {
    const StringPiece original_string = *string->value;
    std::string result = localizer_.Start();

    // Pseudolocalize only the translatable sections.
    size_t start = 0u;
    for (const UntranslatableSection& section : string->untranslatable_sections) {
      // Pseudolocalize the content before the untranslatable section.
      const size_t len = section.start - start;
      if (len > 0u) {
        result += localizer_.Text(original_string.substr(start, len));
      }

      // Copy the untranslatable content.
      result += original_string.substr(section.start, section.end - section.start);
      start = section.end;
    }

    // Pseudolocalize the content after the last untranslatable section.
    if (start != original_string.size()) {
      const size_t len = original_string.size() - start;
      result += localizer_.Text(original_string.substr(start, len));
    }
    result += localizer_.End();

    std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
    localized->SetSource(string->GetSource());
    localized->SetWeak(true);
    item = std::move(localized);
  }

  void Visit(StyledString* string) override {
    item = PseudolocalizeStyledString(string, method_, pool_);
    item->SetSource(string->GetSource());
    item->SetWeak(true);
  }

 private:
  DISALLOW_COPY_AND_ASSIGN(Visitor);

  StringPool* pool_;
  Pseudolocalizer::Method method_;
  Pseudolocalizer localizer_;
};

ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
                                              Pseudolocalizer::Method m) {
  ConfigDescription modified = base;
  switch (m) {
    case Pseudolocalizer::Method::kAccent:
      modified.language[0] = 'e';
      modified.language[1] = 'n';
      modified.country[0] = 'X';
      modified.country[1] = 'A';
      break;

    case Pseudolocalizer::Method::kBidi:
      modified.language[0] = 'a';
      modified.language[1] = 'r';
      modified.country[0] = 'X';
      modified.country[1] = 'B';
      break;
    default:
      break;
  }
  return modified;
}

void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
                            ResourceConfigValue* original_value,
                            StringPool* pool, ResourceEntry* entry) {
  Visitor visitor(pool, method);
  original_value->value->Accept(&visitor);

  std::unique_ptr<Value> localized_value;
  if (visitor.value) {
    localized_value = std::move(visitor.value);
  } else if (visitor.item) {
    localized_value = std::move(visitor.item);
  }

  if (!localized_value) {
    return;
  }

  ConfigDescription config_with_accent =
      ModifyConfigForPseudoLocale(original_value->config, method);

  ResourceConfigValue* new_config_value =
      entry->FindOrCreateValue(config_with_accent, original_value->product);
  if (!new_config_value->value) {
    // Only use auto-generated pseudo-localization if none is defined.
    new_config_value->value = std::move(localized_value);
  }
}

// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
// translatable.
static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
  const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
  if (diff & ConfigDescription::CONFIG_LOCALE) {
    return false;
  }
  return config_value->value->IsTranslatable();
}

}  // namespace

bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
  for (auto& package : table->packages) {
    for (auto& type : package->types) {
      for (auto& entry : type->entries) {
        std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
        for (ResourceConfigValue* value : values) {
          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
                                 entry.get());
          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
                                 entry.get());
        }
      }
    }
  }
  return true;
}

}  // namespace aapt