// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
#define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
#include <queue>
#include <set>
#include <string>
#include <vector>
#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h"
#include "base/memory/weak_ptr.h"
#include "url/gurl.h"
class Utterance;
class TtsPlatformImpl;
class Profile;
namespace base {
class Value;
}
// Events sent back from the TTS engine indicating the progress.
enum TtsEventType {
TTS_EVENT_START,
TTS_EVENT_END,
TTS_EVENT_WORD,
TTS_EVENT_SENTENCE,
TTS_EVENT_MARKER,
TTS_EVENT_INTERRUPTED,
TTS_EVENT_CANCELLED,
TTS_EVENT_ERROR,
TTS_EVENT_PAUSE,
TTS_EVENT_RESUME
};
enum TtsGenderType {
TTS_GENDER_NONE,
TTS_GENDER_MALE,
TTS_GENDER_FEMALE
};
// Returns true if this event type is one that indicates an utterance
// is finished and can be destroyed.
bool IsFinalTtsEventType(TtsEventType event_type);
// The continuous parameters that apply to a given utterance.
struct UtteranceContinuousParameters {
UtteranceContinuousParameters();
double rate;
double pitch;
double volume;
};
// Information about one voice.
struct VoiceData {
VoiceData();
~VoiceData();
std::string name;
std::string lang;
TtsGenderType gender;
std::string extension_id;
std::set<TtsEventType> events;
// If true, the synthesis engine is a remote network resource.
// It may be higher latency and may incur bandwidth costs.
bool remote;
// If true, this is implemented by this platform's subclass of
// TtsPlatformImpl. If false, this is implemented by an extension.
bool native;
std::string native_voice_identifier;
};
// Class that wants to receive events on utterances.
class UtteranceEventDelegate {
public:
virtual ~UtteranceEventDelegate() {}
virtual void OnTtsEvent(Utterance* utterance,
TtsEventType event_type,
int char_index,
const std::string& error_message) = 0;
};
// Class that wants to be notified when the set of
// voices has changed.
class VoicesChangedDelegate {
public:
virtual ~VoicesChangedDelegate() {}
virtual void OnVoicesChanged() = 0;
};
// One speech utterance.
class Utterance {
public:
// Construct an utterance given a profile and a completion task to call
// when the utterance is done speaking. Before speaking this utterance,
// its other parameters like text, rate, pitch, etc. should all be set.
explicit Utterance(Profile* profile);
~Utterance();
// Sends an event to the delegate. If the event type is TTS_EVENT_END
// or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
// uses the last good value.
void OnTtsEvent(TtsEventType event_type,
int char_index,
const std::string& error_message);
// Finish an utterance without sending an event to the delegate.
void Finish();
// Getters and setters for the text to speak and other speech options.
void set_text(const std::string& text) { text_ = text; }
const std::string& text() const { return text_; }
void set_options(const base::Value* options);
const base::Value* options() const { return options_.get(); }
void set_src_extension_id(const std::string& src_extension_id) {
src_extension_id_ = src_extension_id;
}
const std::string& src_extension_id() { return src_extension_id_; }
void set_src_id(int src_id) { src_id_ = src_id; }
int src_id() { return src_id_; }
void set_src_url(const GURL& src_url) { src_url_ = src_url; }
const GURL& src_url() { return src_url_; }
void set_voice_name(const std::string& voice_name) {
voice_name_ = voice_name;
}
const std::string& voice_name() const { return voice_name_; }
void set_lang(const std::string& lang) {
lang_ = lang;
}
const std::string& lang() const { return lang_; }
void set_gender(TtsGenderType gender) {
gender_ = gender;
}
TtsGenderType gender() const { return gender_; }
void set_continuous_parameters(const UtteranceContinuousParameters& params) {
continuous_parameters_ = params;
}
const UtteranceContinuousParameters& continuous_parameters() {
return continuous_parameters_;
}
void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
bool can_enqueue() const { return can_enqueue_; }
void set_required_event_types(const std::set<TtsEventType>& types) {
required_event_types_ = types;
}
const std::set<TtsEventType>& required_event_types() const {
return required_event_types_;
}
void set_desired_event_types(const std::set<TtsEventType>& types) {
desired_event_types_ = types;
}
const std::set<TtsEventType>& desired_event_types() const {
return desired_event_types_;
}
const std::string& extension_id() const { return extension_id_; }
void set_extension_id(const std::string& extension_id) {
extension_id_ = extension_id;
}
UtteranceEventDelegate* event_delegate() const {
return event_delegate_.get();
}
void set_event_delegate(
base::WeakPtr<UtteranceEventDelegate> event_delegate) {
event_delegate_ = event_delegate;
}
// Getters and setters for internal state.
Profile* profile() const { return profile_; }
int id() const { return id_; }
bool finished() const { return finished_; }
private:
// The profile that initiated this utterance.
Profile* profile_;
// The extension ID of the extension providing TTS for this utterance, or
// empty if native TTS is being used.
std::string extension_id_;
// The unique ID of this utterance, used to associate callback functions
// with utterances.
int id_;
// The id of the next utterance, so we can associate requests with
// responses.
static int next_utterance_id_;
// The text to speak.
std::string text_;
// The full options arg passed to tts.speak, which may include fields
// other than the ones we explicitly parse, below.
scoped_ptr<base::Value> options_;
// The extension ID of the extension that called speak() and should
// receive events.
std::string src_extension_id_;
// The source extension's ID of this utterance, so that it can associate
// events with the appropriate callback.
int src_id_;
// The URL of the page where the source extension called speak.
GURL src_url_;
// The delegate to be called when an utterance event is fired.
base::WeakPtr<UtteranceEventDelegate> event_delegate_;
// The parsed options.
std::string voice_name_;
std::string lang_;
TtsGenderType gender_;
UtteranceContinuousParameters continuous_parameters_;
bool can_enqueue_;
std::set<TtsEventType> required_event_types_;
std::set<TtsEventType> desired_event_types_;
// The index of the current char being spoken.
int char_index_;
// True if this utterance received an event indicating it's done.
bool finished_;
};
// Singleton class that manages text-to-speech for the TTS and TTS engine
// extension APIs, maintaining a queue of pending utterances and keeping
// track of all state.
class TtsController {
public:
// Get the single instance of this class.
static TtsController* GetInstance();
// Returns true if we're currently speaking an utterance.
bool IsSpeaking();
// Speak the given utterance. If the utterance's can_enqueue flag is true
// and another utterance is in progress, adds it to the end of the queue.
// Otherwise, interrupts any current utterance and speaks this one
// immediately.
void SpeakOrEnqueue(Utterance* utterance);
// Stop all utterances and flush the queue. Implies leaving pause mode
// as well.
void Stop();
// Pause the speech queue. Some engines may support pausing in the middle
// of an utterance.
void Pause();
// Resume speaking.
void Resume();
// Handle events received from the speech engine. Events are forwarded to
// the callback function, and in addition, completion and error events
// trigger finishing the current utterance and starting the next one, if
// any.
void OnTtsEvent(int utterance_id,
TtsEventType event_type,
int char_index,
const std::string& error_message);
// Return a list of all available voices, including the native voice,
// if supported, and all voices registered by extensions.
void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);
// Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
// finishes loading the built-in TTS component extension.
void RetrySpeakingQueuedUtterances();
// Called by the extension system or platform implementation when the
// list of voices may have changed and should be re-queried.
void VoicesChanged();
// Add a delegate that wants to be notified when the set of voices changes.
void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);
// Remove delegate that wants to be notified when the set of voices changes.
void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);
// For unit testing.
void SetPlatformImpl(TtsPlatformImpl* platform_impl);
int QueueSize();
protected:
TtsController();
virtual ~TtsController();
private:
// Get the platform TTS implementation (or injected mock).
TtsPlatformImpl* GetPlatformImpl();
// Start speaking the given utterance. Will either take ownership of
// |utterance| or delete it if there's an error. Returns true on success.
void SpeakNow(Utterance* utterance);
// Clear the utterance queue. If send_events is true, will send
// TTS_EVENT_CANCELLED events on each one.
void ClearUtteranceQueue(bool send_events);
// Finalize and delete the current utterance.
void FinishCurrentUtterance();
// Start speaking the next utterance in the queue.
void SpeakNextUtterance();
// Given an utterance and a vector of voices, return the
// index of the voice that best matches the utterance.
int GetMatchingVoice(const Utterance* utterance,
std::vector<VoiceData>& voices);
friend struct DefaultSingletonTraits<TtsController>;
// The current utterance being spoken.
Utterance* current_utterance_;
// Whether the queue is paused or not.
bool paused_;
// A queue of utterances to speak after the current one finishes.
std::queue<Utterance*> utterance_queue_;
// A set of delegates that want to be notified when the voices change.
std::set<VoicesChangedDelegate*> voices_changed_delegates_;
// A pointer to the platform implementation of text-to-speech, for
// dependency injection.
TtsPlatformImpl* platform_impl_;
DISALLOW_COPY_AND_ASSIGN(TtsController);
};
#endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_