// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/renderer/translate/translate_helper.h" #if defined(CLD2_DYNAMIC_MODE) #include <stdint.h> #endif #include "base/bind.h" #include "base/compiler_specific.h" #if defined(CLD2_DYNAMIC_MODE) #include "base/files/memory_mapped_file.h" #endif #include "base/logging.h" #include "base/message_loop/message_loop.h" #include "base/strings/string16.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "chrome/renderer/isolated_world_ids.h" #include "components/translate/content/common/translate_messages.h" #include "components/translate/core/common/translate_constants.h" #include "components/translate/core/common/translate_metrics.h" #include "components/translate/core/common/translate_util.h" #include "components/translate/core/language_detection/language_detection_util.h" #include "content/public/renderer/render_view.h" #include "extensions/common/constants.h" #include "extensions/renderer/extension_groups.h" #include "ipc/ipc_platform_file.h" #if defined(CLD2_DYNAMIC_MODE) #include "content/public/common/url_constants.h" #include "third_party/cld_2/src/public/compact_lang_det.h" #endif #include "third_party/WebKit/public/web/WebDocument.h" #include "third_party/WebKit/public/web/WebElement.h" #include "third_party/WebKit/public/web/WebFrame.h" #include "third_party/WebKit/public/web/WebNode.h" #include "third_party/WebKit/public/web/WebNodeList.h" #include "third_party/WebKit/public/web/WebScriptSource.h" #include "third_party/WebKit/public/web/WebView.h" #include "third_party/WebKit/public/web/WebWidget.h" #include "url/gurl.h" #include "v8/include/v8.h" using base::ASCIIToUTF16; using blink::WebDocument; using blink::WebElement; using blink::WebFrame; using blink::WebNode; using blink::WebNodeList; using blink::WebScriptSource; using blink::WebSecurityOrigin; using blink::WebString; using blink::WebVector; using blink::WebView; namespace { // The delay in milliseconds that we'll wait before checking to see if the // translate library injected in the page is ready. const int kTranslateInitCheckDelayMs = 150; // The maximum number of times we'll check to see if the translate library // injected in the page is ready. const int kMaxTranslateInitCheckAttempts = 5; // The delay we wait in milliseconds before checking whether the translation has // finished. const int kTranslateStatusCheckDelayMs = 400; // Language name passed to the Translate element for it to detect the language. const char kAutoDetectionLanguage[] = "auto"; // Isolated world sets following content-security-policy. const char kContentSecurityPolicy[] = "script-src 'self' 'unsafe-eval'"; } // namespace #if defined(CLD2_DYNAMIC_MODE) // The mmap for the CLD2 data must be held forever once it is available in the // process. This is declared static in the translate_helper.h. base::LazyInstance<TranslateHelper::CLDMmapWrapper>::Leaky TranslateHelper::s_cld_mmap_ = LAZY_INSTANCE_INITIALIZER; #endif //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, public: // TranslateHelper::TranslateHelper(content::RenderView* render_view) : content::RenderViewObserver(render_view), page_id_(-1), translation_pending_(false), weak_method_factory_(this) #if defined(CLD2_DYNAMIC_MODE) ,cld2_data_file_polling_started_(false), cld2_data_file_polling_canceled_(false), deferred_page_capture_(false), deferred_page_id_(-1), deferred_contents_(ASCIIToUTF16("")) #endif { } TranslateHelper::~TranslateHelper() { CancelPendingTranslation(); #if defined(CLD2_DYNAMIC_MODE) CancelCLD2DataFilePolling(); #endif } void TranslateHelper::PrepareForUrl(const GURL& url) { #if defined(CLD2_DYNAMIC_MODE) deferred_page_capture_ = false; deferred_contents_.clear(); if (cld2_data_file_polling_started_) return; // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to // components/translate/core/common/translate_util.cc, and ignore any URL // that fails that check. This will require moving unit tests and rewiring // other function calls as well, so for now replicate the logic here. if (url.is_empty()) return; if (url.SchemeIs(content::kChromeUIScheme)) return; if (url.SchemeIs(content::kChromeDevToolsScheme)) return; if (url.SchemeIs(url::kFtpScheme)) return; #if defined(OS_CHROMEOS) if (url.SchemeIs(extensions::kExtensionScheme) && url.DomainIs(file_manager::kFileManagerAppId)) return; #endif // Start polling for CLD data. cld2_data_file_polling_started_ = true; TranslateHelper::SendCLD2DataFileRequest(0, 1000); #endif } #if defined(CLD2_DYNAMIC_MODE) void TranslateHelper::DeferPageCaptured(const int page_id, const base::string16& contents) { deferred_page_capture_ = true; deferred_page_id_ = page_id; deferred_contents_ = contents; } #endif void TranslateHelper::PageCaptured(int page_id, const base::string16& contents) { // Get the document language as set by WebKit from the http-equiv // meta tag for "content-language". This may or may not also // have a value derived from the actual Content-Language HTTP // header. The two actually have different meanings (despite the // original intent of http-equiv to be an equivalent) with the former // being the language of the document and the latter being the // language of the intended audience (a distinction really only // relevant for things like langauge textbooks). This distinction // shouldn't affect translation. WebFrame* main_frame = GetMainFrame(); if (!main_frame || render_view()->GetPageId() != page_id) return; // TODO(andrewhayden): UMA insertion point here: Track if data is available. // TODO(andrewhayden): Retry insertion point here, retry till data available. #if defined(CLD2_DYNAMIC_MODE) if (!CLD2::isDataLoaded()) { // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data // is loaded, if ever. TranslateHelper::DeferPageCaptured(page_id, contents); return; } #endif page_id_ = page_id; WebDocument document = main_frame->document(); std::string content_language = document.contentLanguage().utf8(); WebElement html_element = document.documentElement(); std::string html_lang; // |html_element| can be null element, e.g. in // BrowserTest.WindowOpenClose. if (!html_element.isNull()) html_lang = html_element.getAttribute("lang").utf8(); std::string cld_language; bool is_cld_reliable; std::string language = translate::DeterminePageLanguage( content_language, html_lang, contents, &cld_language, &is_cld_reliable); if (language.empty()) return; language_determined_time_ = base::TimeTicks::Now(); GURL url(document.url()); LanguageDetectionDetails details; details.time = base::Time::Now(); details.url = url; details.content_language = content_language; details.cld_language = cld_language; details.is_cld_reliable = is_cld_reliable; details.html_root_language = html_lang; details.adopted_language = language; // TODO(hajimehoshi): If this affects performance, it should be set only if // translate-internals tab exists. details.contents = contents; Send(new ChromeViewHostMsg_TranslateLanguageDetermined( routing_id(), details, IsTranslationAllowed(&document) && !language.empty())); } void TranslateHelper::CancelPendingTranslation() { weak_method_factory_.InvalidateWeakPtrs(); translation_pending_ = false; source_lang_.clear(); target_lang_.clear(); #if defined(CLD2_DYNAMIC_MODE) CancelCLD2DataFilePolling(); #endif } //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, protected: // bool TranslateHelper::IsTranslateLibAvailable() { return ExecuteScriptAndGetBoolResult( "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && " "typeof cr.googleTranslate.translate == 'function'", false); } bool TranslateHelper::IsTranslateLibReady() { return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false); } bool TranslateHelper::HasTranslationFinished() { return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true); } bool TranslateHelper::HasTranslationFailed() { return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true); } bool TranslateHelper::StartTranslation() { std::string script = "cr.googleTranslate.translate('" + source_lang_ + "','" + target_lang_ + "')"; return ExecuteScriptAndGetBoolResult(script, false); } std::string TranslateHelper::GetOriginalPageLanguage() { return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang"); } base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) { // Just converts |delayInMs| without any modification in practical cases. // Tests will override this function to return modified value. return base::TimeDelta::FromMilliseconds(delayInMs); } void TranslateHelper::ExecuteScript(const std::string& script) { WebFrame* main_frame = GetMainFrame(); if (!main_frame) return; WebScriptSource source = WebScriptSource(ASCIIToUTF16(script)); main_frame->executeScriptInIsolatedWorld( chrome::ISOLATED_WORLD_ID_TRANSLATE, &source, 1, extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS); } bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script, bool fallback) { WebFrame* main_frame = GetMainFrame(); if (!main_frame) return fallback; v8::HandleScope handle_scope(v8::Isolate::GetCurrent()); WebVector<v8::Local<v8::Value> > results; WebScriptSource source = WebScriptSource(ASCIIToUTF16(script)); main_frame->executeScriptInIsolatedWorld( chrome::ISOLATED_WORLD_ID_TRANSLATE, &source, 1, extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS, &results); if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsBoolean()) { NOTREACHED(); return fallback; } return results[0]->BooleanValue(); } std::string TranslateHelper::ExecuteScriptAndGetStringResult( const std::string& script) { WebFrame* main_frame = GetMainFrame(); if (!main_frame) return std::string(); v8::HandleScope handle_scope(v8::Isolate::GetCurrent()); WebVector<v8::Local<v8::Value> > results; WebScriptSource source = WebScriptSource(ASCIIToUTF16(script)); main_frame->executeScriptInIsolatedWorld( chrome::ISOLATED_WORLD_ID_TRANSLATE, &source, 1, extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS, &results); if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsString()) { NOTREACHED(); return std::string(); } v8::Local<v8::String> v8_str = results[0]->ToString(); int length = v8_str->Utf8Length() + 1; scoped_ptr<char[]> str(new char[length]); v8_str->WriteUtf8(str.get(), length); return std::string(str.get()); } double TranslateHelper::ExecuteScriptAndGetDoubleResult( const std::string& script) { WebFrame* main_frame = GetMainFrame(); if (!main_frame) return 0.0; v8::HandleScope handle_scope(v8::Isolate::GetCurrent()); WebVector<v8::Local<v8::Value> > results; WebScriptSource source = WebScriptSource(ASCIIToUTF16(script)); main_frame->executeScriptInIsolatedWorld( chrome::ISOLATED_WORLD_ID_TRANSLATE, &source, 1, extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS, &results); if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsNumber()) { NOTREACHED(); return 0.0; } return results[0]->NumberValue(); } //////////////////////////////////////////////////////////////////////////////// // TranslateHelper, private: // // static bool TranslateHelper::IsTranslationAllowed(WebDocument* document) { WebElement head = document->head(); if (head.isNull() || !head.hasChildNodes()) return true; const WebString meta(ASCIIToUTF16("meta")); const WebString name(ASCIIToUTF16("name")); const WebString google(ASCIIToUTF16("google")); const WebString value(ASCIIToUTF16("value")); const WebString content(ASCIIToUTF16("content")); WebNodeList children = head.childNodes(); for (size_t i = 0; i < children.length(); ++i) { WebNode node = children.item(i); if (!node.isElementNode()) continue; WebElement element = node.to<WebElement>(); // Check if a tag is <meta>. if (!element.hasTagName(meta)) continue; // Check if the tag contains name="google". WebString attribute = element.getAttribute(name); if (attribute.isNull() || attribute != google) continue; // Check if the tag contains value="notranslate", or content="notranslate". attribute = element.getAttribute(value); if (attribute.isNull()) attribute = element.getAttribute(content); if (attribute.isNull()) continue; if (LowerCaseEqualsASCII(attribute, "notranslate")) return false; } return true; } bool TranslateHelper::OnMessageReceived(const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message) IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage) IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation) #if defined(CLD2_DYNAMIC_MODE) IPC_MESSAGE_HANDLER(ChromeViewMsg_CLDDataAvailable, OnCLDDataAvailable); #endif IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() return handled; } void TranslateHelper::OnTranslatePage(int page_id, const std::string& translate_script, const std::string& source_lang, const std::string& target_lang) { WebFrame* main_frame = GetMainFrame(); if (!main_frame || page_id_ != page_id || render_view()->GetPageId() != page_id) return; // We navigated away, nothing to do. // A similar translation is already under way, nothing to do. if (translation_pending_ && target_lang_ == target_lang) return; // Any pending translation is now irrelevant. CancelPendingTranslation(); // Set our states. translation_pending_ = true; // If the source language is undetermined, we'll let the translate element // detect it. source_lang_ = (source_lang != translate::kUnknownLanguageCode) ? source_lang : kAutoDetectionLanguage; target_lang_ = target_lang; translate::ReportUserActionDuration(language_determined_time_, base::TimeTicks::Now()); GURL url(main_frame->document().url()); translate::ReportPageScheme(url.scheme()); // Set up v8 isolated world with proper content-security-policy and // security-origin. WebFrame* frame = GetMainFrame(); if (frame) { frame->setIsolatedWorldContentSecurityPolicy( chrome::ISOLATED_WORLD_ID_TRANSLATE, WebString::fromUTF8(kContentSecurityPolicy)); GURL security_origin = translate::GetTranslateSecurityOrigin(); frame->setIsolatedWorldSecurityOrigin( chrome::ISOLATED_WORLD_ID_TRANSLATE, WebSecurityOrigin::create(security_origin)); } if (!IsTranslateLibAvailable()) { // Evaluate the script to add the translation related method to the global // context of the page. ExecuteScript(translate_script); DCHECK(IsTranslateLibAvailable()); } TranslatePageImpl(0); } void TranslateHelper::OnRevertTranslation(int page_id) { if (page_id_ != page_id || render_view()->GetPageId() != page_id) return; // We navigated away, nothing to do. if (!IsTranslateLibAvailable()) { NOTREACHED(); return; } CancelPendingTranslation(); ExecuteScript("cr.googleTranslate.revert()"); } void TranslateHelper::CheckTranslateStatus() { // If this is not the same page, the translation has been canceled. If the // view is gone, the page is closing. if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView()) return; // First check if there was an error. if (HasTranslationFailed()) { // TODO(toyoshim): Check |errorCode| of translate.js and notify it here. NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR); return; // There was an error. } if (HasTranslationFinished()) { std::string actual_source_lang; // Translation was successfull, if it was auto, retrieve the source // language the Translate Element detected. if (source_lang_ == kAutoDetectionLanguage) { actual_source_lang = GetOriginalPageLanguage(); if (actual_source_lang.empty()) { NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE); return; } else if (actual_source_lang == target_lang_) { NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES); return; } } else { actual_source_lang = source_lang_; } if (!translation_pending_) { NOTREACHED(); return; } translation_pending_ = false; // Check JavaScript performance counters for UMA reports. translate::ReportTimeToTranslate( ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime")); // Notify the browser we are done. render_view()->Send(new ChromeViewHostMsg_PageTranslated( render_view()->GetRoutingID(), render_view()->GetPageId(), actual_source_lang, target_lang_, TranslateErrors::NONE)); return; } // The translation is still pending, check again later. base::MessageLoop::current()->PostDelayedTask( FROM_HERE, base::Bind(&TranslateHelper::CheckTranslateStatus, weak_method_factory_.GetWeakPtr()), AdjustDelay(kTranslateStatusCheckDelayMs)); } void TranslateHelper::TranslatePageImpl(int count) { DCHECK_LT(count, kMaxTranslateInitCheckAttempts); if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView()) return; if (!IsTranslateLibReady()) { // The library is not ready, try again later, unless we have tried several // times unsucessfully already. if (++count >= kMaxTranslateInitCheckAttempts) { NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR); return; } base::MessageLoop::current()->PostDelayedTask( FROM_HERE, base::Bind(&TranslateHelper::TranslatePageImpl, weak_method_factory_.GetWeakPtr(), count), AdjustDelay(count * kTranslateInitCheckDelayMs)); return; } // The library is loaded, and ready for translation now. // Check JavaScript performance counters for UMA reports. translate::ReportTimeToBeReady( ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime")); translate::ReportTimeToLoad( ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime")); if (!StartTranslation()) { NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR); return; } // Check the status of the translation. base::MessageLoop::current()->PostDelayedTask( FROM_HERE, base::Bind(&TranslateHelper::CheckTranslateStatus, weak_method_factory_.GetWeakPtr()), AdjustDelay(kTranslateStatusCheckDelayMs)); } void TranslateHelper::NotifyBrowserTranslationFailed( TranslateErrors::Type error) { translation_pending_ = false; // Notify the browser there was an error. render_view()->Send(new ChromeViewHostMsg_PageTranslated( render_view()->GetRoutingID(), page_id_, source_lang_, target_lang_, error)); } WebFrame* TranslateHelper::GetMainFrame() { WebView* web_view = render_view()->GetWebView(); // When the tab is going to be closed, the web_view can be NULL. if (!web_view) return NULL; return web_view->mainFrame(); } #if defined(CLD2_DYNAMIC_MODE) void TranslateHelper::CancelCLD2DataFilePolling() { cld2_data_file_polling_canceled_ = true; } void TranslateHelper::SendCLD2DataFileRequest(const int delay_millis, const int next_delay_millis) { // Terminate immediately if told to stop polling. if (cld2_data_file_polling_canceled_) return; // Terminate immediately if data is already loaded. if (CLD2::isDataLoaded()) return; // Else, send the IPC message to the browser process requesting the data... Send(new ChromeViewHostMsg_NeedCLDData(routing_id())); // ... and enqueue another delayed task to call again. This will start a // chain of polling that will last until the pointer stops being NULL, // which is the right thing to do. // NB: In the great majority of cases, the data file will be available and // the very first delayed task will be a no-op that terminates the chain. // It's only while downloading the file that this will chain for a // nontrivial amount of time. // Use a weak pointer to avoid keeping this helper object around forever. base::MessageLoop::current()->PostDelayedTask( FROM_HERE, base::Bind(&TranslateHelper::SendCLD2DataFileRequest, weak_method_factory_.GetWeakPtr(), next_delay_millis, next_delay_millis), base::TimeDelta::FromMilliseconds(delay_millis)); } void TranslateHelper::OnCLDDataAvailable( const IPC::PlatformFileForTransit ipc_file_handle, const uint64 data_offset, const uint64 data_length) { LoadCLDDData(IPC::PlatformFileForTransitToFile(ipc_file_handle), data_offset, data_length); if (deferred_page_capture_ && CLD2::isDataLoaded()) { deferred_page_capture_ = false; // Don't do this a second time. PageCaptured(deferred_page_id_, deferred_contents_); deferred_page_id_ = -1; // Clean up for sanity deferred_contents_.clear(); // Clean up for sanity } } void TranslateHelper::LoadCLDDData( base::File file, const uint64 data_offset, const uint64 data_length) { // Terminate immediately if told to stop polling. if (cld2_data_file_polling_canceled_) return; // Terminate immediately if data is already loaded. if (CLD2::isDataLoaded()) return; if (!file.IsValid()) { LOG(ERROR) << "Can't find the CLD data file."; return; } // mmap the file s_cld_mmap_.Get().value = new base::MemoryMappedFile(); bool initialized = s_cld_mmap_.Get().value->Initialize(file.Pass()); if (!initialized) { LOG(ERROR) << "mmap initialization failed"; delete s_cld_mmap_.Get().value; s_cld_mmap_.Get().value = NULL; return; } // Sanity checks uint64 max_int32 = std::numeric_limits<int32>::max(); if (data_length + data_offset > s_cld_mmap_.Get().value->length() || data_length > max_int32) { // max signed 32 bit integer LOG(ERROR) << "Illegal mmap config: data_offset=" << data_offset << ", data_length=" << data_length << ", mmap->length()=" << s_cld_mmap_.Get().value->length(); delete s_cld_mmap_.Get().value; s_cld_mmap_.Get().value = NULL; return; } // Initialize the CLD subsystem... and it's all done! const uint8* data_ptr = s_cld_mmap_.Get().value->data() + data_offset; CLD2::loadDataFromRawAddress(data_ptr, data_length); DCHECK(CLD2::isDataLoaded()) << "Failed to load CLD data from mmap"; } #endif