1 // Copyright (c) 2018 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_PUBLIC_BROWSER_TTS_CONTROLLER_H_ 6 #define CONTENT_PUBLIC_BROWSER_TTS_CONTROLLER_H_ 7 8 #include <memory> 9 #include <queue> 10 #include <set> 11 #include <string> 12 #include <vector> 13 14 #include "base/memory/singleton.h" 15 #include "base/memory/weak_ptr.h" 16 #include "base/observer_list_types.h" 17 #include "content/common/content_export.h" 18 #include "content/public/browser/tts_utterance.h" 19 #include "url/gurl.h" 20 21 namespace content { 22 class BrowserContext; 23 class TtsPlatform; 24 25 // Information about one voice. 26 struct CONTENT_EXPORT VoiceData { 27 VoiceData(); 28 VoiceData(const VoiceData& other); 29 ~VoiceData(); 30 31 std::string name; 32 std::string lang; 33 std::string engine_id; 34 std::set<TtsEventType> events; 35 36 // If true, the synthesis engine is a remote network resource. 37 // It may be higher latency and may incur bandwidth costs. 38 bool remote; 39 40 // If true, this is implemented by this platform's subclass of 41 // TtsPlatformImpl. If false, this is implemented in a content embedder. 42 bool native; 43 std::string native_voice_identifier; 44 }; 45 46 // Interface that delegates TTS requests to engines in content embedders. 47 class CONTENT_EXPORT TtsEngineDelegate { 48 public: ~TtsEngineDelegate()49 virtual ~TtsEngineDelegate() {} 50 51 // Return a list of all available voices registered. 52 virtual void GetVoices(BrowserContext* browser_context, 53 std::vector<VoiceData>* out_voices) = 0; 54 55 // Speak the given utterance by sending an event to the given TTS engine. 56 virtual void Speak(TtsUtterance* utterance, const VoiceData& voice) = 0; 57 58 // Stop speaking the given utterance by sending an event to the target 59 // associated with this utterance. 60 virtual void Stop(TtsUtterance* utterance) = 0; 61 62 // Pause in the middle of speaking this utterance. 63 virtual void Pause(TtsUtterance* utterance) = 0; 64 65 // Resume speaking this utterance. 66 virtual void Resume(TtsUtterance* utterance) = 0; 67 68 // Load the built-in TTS engine. 69 virtual bool LoadBuiltInTtsEngine(BrowserContext* browser_context) = 0; 70 }; 71 72 // Class that wants to be notified when the set of 73 // voices has changed. 74 class CONTENT_EXPORT VoicesChangedDelegate : public base::CheckedObserver { 75 public: 76 virtual void OnVoicesChanged() = 0; 77 }; 78 79 // Singleton class that manages text-to-speech for all TTS engines and 80 // APIs, maintaining a queue of pending utterances and keeping 81 // track of all state. 82 class CONTENT_EXPORT TtsController { 83 public: 84 // Get the single instance of this class. 85 static TtsController* GetInstance(); 86 87 // Returns true if we're currently speaking an utterance. 88 virtual bool IsSpeaking() = 0; 89 90 // Speak the given utterance. If the utterance's can_enqueue flag is true 91 // and another utterance is in progress, adds it to the end of the queue. 92 // Otherwise, interrupts any current utterance and speaks this one 93 // immediately. 94 virtual void SpeakOrEnqueue(std::unique_ptr<TtsUtterance> utterance) = 0; 95 96 // Stop all utterances and flush the queue. Implies leaving pause mode 97 // as well. 98 virtual void Stop() = 0; 99 100 // Stops the current utterance if it matches the given |source_url|. 101 virtual void Stop(const GURL& source_url) = 0; 102 103 // Pause the speech queue. Some engines may support pausing in the middle 104 // of an utterance. 105 virtual void Pause() = 0; 106 107 // Resume speaking. 108 virtual void Resume() = 0; 109 110 // Handle events received from the speech engine. Events are forwarded to 111 // the callback function, and in addition, completion and error events 112 // trigger finishing the current utterance and starting the next one, if 113 // any. If the |char_index| or |length| are not available, the speech engine 114 // should pass -1. 115 virtual void OnTtsEvent(int utterance_id, 116 TtsEventType event_type, 117 int char_index, 118 int length, 119 const std::string& error_message) = 0; 120 121 // Return a list of all available voices, including the native voice, 122 // if supported, and all voices registered by engines. 123 virtual void GetVoices(BrowserContext* browser_context, 124 std::vector<VoiceData>* out_voices) = 0; 125 126 // Called by the content embedder or platform implementation when the 127 // list of voices may have changed and should be re-queried. 128 virtual void VoicesChanged() = 0; 129 130 // Add a delegate that wants to be notified when the set of voices changes. 131 virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0; 132 133 // Remove delegate that wants to be notified when the set of voices changes. 134 virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0; 135 136 // Remove delegate that wants to be notified when an utterance fires an event. 137 // Note: this cancels speech from any utterance with this delegate, and 138 // removes any utterances with this delegate from the queue. 139 virtual void RemoveUtteranceEventDelegate( 140 UtteranceEventDelegate* delegate) = 0; 141 142 // Set the delegate that processes TTS requests with engines in a content 143 // embedder. 144 virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0; 145 146 // Get the delegate that processes TTS requests with engines in a content 147 // embedder. 148 virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0; 149 150 // Visible for testing. 151 virtual void SetTtsPlatform(TtsPlatform* tts_platform) = 0; 152 virtual int QueueSize() = 0; 153 154 virtual void StripSSML( 155 const std::string& utterance, 156 base::OnceCallback<void(const std::string&)> callback) = 0; 157 158 protected: ~TtsController()159 virtual ~TtsController() {} 160 }; 161 162 } // namespace content 163 164 #endif // CONTENT_PUBLIC_BROWSER_TTS_CONTROLLER_H_ 165