// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/speech/speech_input_manager.h"
#include <map>
#include <string>
#include "base/lazy_instance.h"
#include "base/memory/ref_counted.h"
#include "base/synchronization/lock.h"
#include "base/threading/thread_restrictions.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/platform_util.h"
#include "chrome/browser/prefs/pref_service.h"
#include "chrome/browser/speech/speech_input_bubble_controller.h"
#include "chrome/browser/tab_contents/tab_util.h"
#include "chrome/common/chrome_switches.h"
#include "chrome/common/pref_names.h"
#include "content/browser/browser_thread.h"
#include "content/browser/speech/speech_recognizer.h"
#include "grit/generated_resources.h"
#include "media/audio/audio_manager.h"
#include "ui/base/l10n/l10n_util.h"
#if defined(OS_WIN)
#include "chrome/installer/util/wmi.h"
#endif
namespace speech_input {
namespace {
// Asynchronously fetches the PC and audio hardware/driver info if
// the user has opted into UMA. This information is sent with speech input
// requests to the server for identifying and improving quality issues with
// specific device configurations.
class OptionalRequestInfo
: public base::RefCountedThreadSafe<OptionalRequestInfo> {
public:
OptionalRequestInfo() : can_report_metrics_(false) {}
void Refresh() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
// UMA opt-in can be checked only from the UI thread, so switch to that.
BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
NewRunnableMethod(this,
&OptionalRequestInfo::CheckUMAAndGetHardwareInfo));
}
void CheckUMAAndGetHardwareInfo() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
if (g_browser_process->local_state()->GetBoolean(
prefs::kMetricsReportingEnabled)) {
// Access potentially slow OS calls from the FILE thread.
BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo));
}
}
void GetHardwareInfo() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
base::AutoLock lock(lock_);
can_report_metrics_ = true;
#if defined(OS_WIN)
value_ = UTF16ToUTF8(
installer::WMIComputerSystem::GetModel() + L"|" +
AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
#else // defined(OS_WIN)
value_ = UTF16ToUTF8(
AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
#endif // defined(OS_WIN)
}
std::string value() {
base::AutoLock lock(lock_);
return value_;
}
bool can_report_metrics() {
base::AutoLock lock(lock_);
return can_report_metrics_;
}
private:
base::Lock lock_;
std::string value_;
bool can_report_metrics_;
DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
};
class SpeechInputManagerImpl : public SpeechInputManager,
public SpeechInputBubbleControllerDelegate,
public SpeechRecognizerDelegate {
public:
// SpeechInputManager methods.
virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
int caller_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect,
const std::string& language,
const std::string& grammar,
const std::string& origin_url);
virtual void CancelRecognition(int caller_id);
virtual void StopRecording(int caller_id);
virtual void CancelAllRequestsWithDelegate(
SpeechInputManagerDelegate* delegate);
// SpeechRecognizer::Delegate methods.
virtual void DidStartReceivingAudio(int caller_id);
virtual void SetRecognitionResult(int caller_id,
bool error,
const SpeechInputResultArray& result);
virtual void DidCompleteRecording(int caller_id);
virtual void DidCompleteRecognition(int caller_id);
virtual void OnRecognizerError(int caller_id,
SpeechRecognizer::ErrorCode error);
virtual void DidCompleteEnvironmentEstimation(int caller_id);
virtual void SetInputVolume(int caller_id, float volume, float noise_volume);
// SpeechInputBubbleController::Delegate methods.
virtual void InfoBubbleButtonClicked(int caller_id,
SpeechInputBubble::Button button);
virtual void InfoBubbleFocusChanged(int caller_id);
private:
struct SpeechInputRequest {
SpeechInputManagerDelegate* delegate;
scoped_refptr<SpeechRecognizer> recognizer;
bool is_active; // Set to true when recording or recognition is going on.
};
// Private constructor to enforce singleton.
friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>;
SpeechInputManagerImpl();
virtual ~SpeechInputManagerImpl();
bool HasPendingRequest(int caller_id) const;
SpeechInputManagerDelegate* GetDelegate(int caller_id) const;
void CancelRecognitionAndInformDelegate(int caller_id);
// Starts/restarts recognition for an existing request.
void StartRecognitionForRequest(int caller_id);
typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
SpeechRecognizerMap requests_;
int recording_caller_id_;
scoped_refptr<SpeechInputBubbleController> bubble_controller_;
scoped_refptr<OptionalRequestInfo> optional_request_info_;
};
base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl(
base::LINKER_INITIALIZED);
} // namespace
SpeechInputManager* SpeechInputManager::Get() {
return g_speech_input_manager_impl.Pointer();
}
void SpeechInputManager::ShowAudioInputSettings() {
// Since AudioManager::ShowAudioInputSettings can potentially launch external
// processes, do that in the FILE thread to not block the calling threads.
if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
BrowserThread::PostTask(
BrowserThread::FILE, FROM_HERE,
NewRunnableFunction(&SpeechInputManager::ShowAudioInputSettings));
return;
}
DCHECK(AudioManager::GetAudioManager()->CanShowAudioInputSettings());
if (AudioManager::GetAudioManager()->CanShowAudioInputSettings())
AudioManager::GetAudioManager()->ShowAudioInputSettings();
}
SpeechInputManagerImpl::SpeechInputManagerImpl()
: recording_caller_id_(0),
bubble_controller_(new SpeechInputBubbleController(
ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
}
SpeechInputManagerImpl::~SpeechInputManagerImpl() {
while (requests_.begin() != requests_.end())
CancelRecognition(requests_.begin()->first);
}
bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
return requests_.find(caller_id) != requests_.end();
}
SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
int caller_id) const {
return requests_.find(caller_id)->second.delegate;
}
void SpeechInputManagerImpl::StartRecognition(
SpeechInputManagerDelegate* delegate,
int caller_id,
int render_process_id,
int render_view_id,
const gfx::Rect& element_rect,
const std::string& language,
const std::string& grammar,
const std::string& origin_url) {
DCHECK(!HasPendingRequest(caller_id));
bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
element_rect);
if (!optional_request_info_.get()) {
optional_request_info_ = new OptionalRequestInfo();
// Since hardware info is optional with speech input requests, we start an
// asynchronous fetch here and move on with recording audio. This first
// speech input request would send an empty string for hardware info and
// subsequent requests may have the hardware info available if the fetch
// completed before them. This way we don't end up stalling the user with
// a long wait and disk seeks when they click on a UI element and start
// speaking.
optional_request_info_->Refresh();
}
SpeechInputRequest* request = &requests_[caller_id];
request->delegate = delegate;
request->recognizer = new SpeechRecognizer(
this, caller_id, language, grammar, optional_request_info_->value(),
optional_request_info_->can_report_metrics() ? origin_url : "");
request->is_active = false;
StartRecognitionForRequest(caller_id);
}
void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
DCHECK(HasPendingRequest(caller_id));
// If we are currently recording audio for another caller, abort that cleanly.
if (recording_caller_id_)
CancelRecognitionAndInformDelegate(recording_caller_id_);
if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
bubble_controller_->SetBubbleMessage(
caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
} else {
recording_caller_id_ = caller_id;
requests_[caller_id].is_active = true;
requests_[caller_id].recognizer->StartRecording();
bubble_controller_->SetBubbleWarmUpMode(caller_id);
}
}
void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
DCHECK(HasPendingRequest(caller_id));
if (requests_[caller_id].is_active)
requests_[caller_id].recognizer->CancelRecognition();
requests_.erase(caller_id);
if (recording_caller_id_ == caller_id)
recording_caller_id_ = 0;
bubble_controller_->CloseBubble(caller_id);
}
void SpeechInputManagerImpl::CancelAllRequestsWithDelegate(
SpeechInputManagerDelegate* delegate) {
SpeechRecognizerMap::iterator it = requests_.begin();
while (it != requests_.end()) {
if (it->second.delegate == delegate) {
CancelRecognition(it->first);
// This map will have very few elements so it is simpler to restart.
it = requests_.begin();
} else {
++it;
}
}
}
void SpeechInputManagerImpl::StopRecording(int caller_id) {
DCHECK(HasPendingRequest(caller_id));
requests_[caller_id].recognizer->StopRecording();
}
void SpeechInputManagerImpl::SetRecognitionResult(
int caller_id, bool error, const SpeechInputResultArray& result) {
DCHECK(HasPendingRequest(caller_id));
GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
}
void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
DCHECK(recording_caller_id_ == caller_id);
DCHECK(HasPendingRequest(caller_id));
recording_caller_id_ = 0;
GetDelegate(caller_id)->DidCompleteRecording(caller_id);
bubble_controller_->SetBubbleRecognizingMode(caller_id);
}
void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
requests_.erase(caller_id);
bubble_controller_->CloseBubble(caller_id);
}
void SpeechInputManagerImpl::OnRecognizerError(
int caller_id, SpeechRecognizer::ErrorCode error) {
if (caller_id == recording_caller_id_)
recording_caller_id_ = 0;
requests_[caller_id].is_active = false;
struct ErrorMessageMapEntry {
SpeechRecognizer::ErrorCode error;
int message_id;
};
ErrorMessageMapEntry error_message_map[] = {
{
SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, IDS_SPEECH_INPUT_MIC_ERROR
}, {
SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
}, {
SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS, IDS_SPEECH_INPUT_NO_RESULTS
}, {
SpeechRecognizer::RECOGNIZER_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
}
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
if (error_message_map[i].error == error) {
bubble_controller_->SetBubbleMessage(
caller_id,
l10n_util::GetStringUTF16(error_message_map[i].message_id));
return;
}
}
NOTREACHED() << "unknown error " << error;
}
void SpeechInputManagerImpl::DidStartReceivingAudio(int caller_id) {
DCHECK(HasPendingRequest(caller_id));
DCHECK(recording_caller_id_ == caller_id);
bubble_controller_->SetBubbleRecordingMode(caller_id);
}
void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
DCHECK(HasPendingRequest(caller_id));
DCHECK(recording_caller_id_ == caller_id);
}
void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume,
float noise_volume) {
DCHECK(HasPendingRequest(caller_id));
DCHECK_EQ(recording_caller_id_, caller_id);
bubble_controller_->SetBubbleInputVolume(caller_id, volume, noise_volume);
}
void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
CancelRecognition(caller_id);
cur_delegate->DidCompleteRecording(caller_id);
cur_delegate->DidCompleteRecognition(caller_id);
}
void SpeechInputManagerImpl::InfoBubbleButtonClicked(
int caller_id, SpeechInputBubble::Button button) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
// Ignore if the caller id was not in our active recognizers list because the
// user might have clicked more than once, or recognition could have been
// cancelled due to other reasons before the user click was processed.
if (!HasPendingRequest(caller_id))
return;
if (button == SpeechInputBubble::BUTTON_CANCEL) {
CancelRecognitionAndInformDelegate(caller_id);
} else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
StartRecognitionForRequest(caller_id);
}
}
void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
// Ignore if the caller id was not in our active recognizers list because the
// user might have clicked more than once, or recognition could have been
// ended due to other reasons before the user click was processed.
if (HasPendingRequest(caller_id)) {
// If this is an ongoing recording or if we were displaying an error message
// to the user, abort it since user has switched focus. Otherwise
// recognition has started and keep that going so user can start speaking to
// another element while this gets the results in parallel.
if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
CancelRecognitionAndInformDelegate(caller_id);
}
}
}
} // namespace speech_input