1 /*
2 * Copyright (C) 2012 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "third_party/blink/renderer/modules/speech/speech_recognition.h"
27
28 #include <algorithm>
29 #include "build/build_config.h"
30 #include "mojo/public/cpp/bindings/pending_receiver.h"
31 #include "mojo/public/cpp/bindings/pending_remote.h"
32 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
33 #include "third_party/blink/renderer/core/frame/local_frame.h"
34 #include "third_party/blink/renderer/core/page/page.h"
35 #include "third_party/blink/renderer/modules/speech/speech_recognition_controller.h"
36 #include "third_party/blink/renderer/modules/speech/speech_recognition_error_event.h"
37 #include "third_party/blink/renderer/modules/speech/speech_recognition_event.h"
38 #include "third_party/blink/renderer/platform/bindings/exception_state.h"
39 #include "third_party/blink/renderer/platform/heap/heap.h"
40
41 namespace blink {
42
Create(ExecutionContext * context)43 SpeechRecognition* SpeechRecognition::Create(ExecutionContext* context) {
44 return MakeGarbageCollected<SpeechRecognition>(To<LocalDOMWindow>(context));
45 }
46
start(ExceptionState & exception_state)47 void SpeechRecognition::start(ExceptionState& exception_state) {
48 if (!controller_ || !GetExecutionContext())
49 return;
50
51 if (started_) {
52 exception_state.ThrowDOMException(DOMExceptionCode::kInvalidStateError,
53 "recognition has already started.");
54 return;
55 }
56
57 final_results_.clear();
58
59 mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient>
60 session_client;
61 // See https://bit.ly/2S0zRAS for task types.
62 receiver_.Bind(
63 session_client.InitWithNewPipeAndPassReceiver(),
64 GetExecutionContext()->GetTaskRunner(TaskType::kMiscPlatformAPI));
65 receiver_.set_disconnect_handler(WTF::Bind(
66 &SpeechRecognition::OnConnectionError, WrapWeakPersistent(this)));
67
68 controller_->Start(
69 session_.BindNewPipeAndPassReceiver(
70 GetExecutionContext()->GetTaskRunner(TaskType::kMiscPlatformAPI)),
71 std::move(session_client), *grammars_, lang_, continuous_,
72 interim_results_, max_alternatives_);
73 started_ = true;
74 }
75
stopFunction()76 void SpeechRecognition::stopFunction() {
77 if (!controller_)
78 return;
79
80 if (started_ && !stopping_) {
81 stopping_ = true;
82 session_->StopCapture();
83 }
84 }
85
abort()86 void SpeechRecognition::abort() {
87 if (!controller_)
88 return;
89
90 if (started_ && !stopping_) {
91 stopping_ = true;
92 session_->Abort();
93 }
94 }
95
ResultRetrieved(WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results)96 void SpeechRecognition::ResultRetrieved(
97 WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) {
98 auto* it = std::stable_partition(
99 results.begin(), results.end(),
100 [](const auto& result) { return !result->is_provisional; });
101 wtf_size_t provisional_count = static_cast<wtf_size_t>(results.end() - it);
102
103 // Add the new results to the previous final results.
104 HeapVector<Member<SpeechRecognitionResult>> aggregated_results =
105 std::move(final_results_);
106 aggregated_results.ReserveCapacity(aggregated_results.size() +
107 results.size());
108
109 for (const auto& result : results) {
110 HeapVector<Member<SpeechRecognitionAlternative>> alternatives;
111 alternatives.ReserveInitialCapacity(result->hypotheses.size());
112 for (const auto& hypothesis : result->hypotheses) {
113 alternatives.push_back(MakeGarbageCollected<SpeechRecognitionAlternative>(
114 hypothesis->utterance, hypothesis->confidence));
115 }
116 aggregated_results.push_back(SpeechRecognitionResult::Create(
117 std::move(alternatives), !result->is_provisional));
118 }
119
120 // |aggregated_results| now contains the following (in the given order):
121 //
122 // (1) previous final results from |final_results_|
123 // (2) new final results from |results|
124 // (3) new provisional results from |results|
125
126 // |final_results_| = (1) + (2).
127 HeapVector<Member<SpeechRecognitionResult>> new_final_results;
128 new_final_results.ReserveInitialCapacity(aggregated_results.size() -
129 provisional_count);
130 new_final_results.AppendRange(aggregated_results.begin(),
131 aggregated_results.end() - provisional_count);
132 final_results_ = std::move(new_final_results);
133
134 // We dispatch an event with (1) + (2) + (3).
135 DispatchEvent(*SpeechRecognitionEvent::CreateResult(
136 aggregated_results.size() - results.size(),
137 std::move(aggregated_results)));
138 }
139
ErrorOccurred(mojom::blink::SpeechRecognitionErrorPtr error)140 void SpeechRecognition::ErrorOccurred(
141 mojom::blink::SpeechRecognitionErrorPtr error) {
142 if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
143 DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
144 } else {
145 // TODO(primiano): message?
146 DispatchEvent(*SpeechRecognitionErrorEvent::Create(error->code, String()));
147 }
148 }
149
Started()150 void SpeechRecognition::Started() {
151 DispatchEvent(*Event::Create(event_type_names::kStart));
152 }
153
AudioStarted()154 void SpeechRecognition::AudioStarted() {
155 DispatchEvent(*Event::Create(event_type_names::kAudiostart));
156 }
157
SoundStarted()158 void SpeechRecognition::SoundStarted() {
159 DispatchEvent(*Event::Create(event_type_names::kSoundstart));
160 DispatchEvent(*Event::Create(event_type_names::kSpeechstart));
161 }
162
SoundEnded()163 void SpeechRecognition::SoundEnded() {
164 DispatchEvent(*Event::Create(event_type_names::kSpeechend));
165 DispatchEvent(*Event::Create(event_type_names::kSoundend));
166 }
167
AudioEnded()168 void SpeechRecognition::AudioEnded() {
169 DispatchEvent(*Event::Create(event_type_names::kAudioend));
170 }
171
Ended()172 void SpeechRecognition::Ended() {
173 started_ = false;
174 stopping_ = false;
175 session_.reset();
176 receiver_.reset();
177 DispatchEvent(*Event::Create(event_type_names::kEnd));
178 }
179
InterfaceName() const180 const AtomicString& SpeechRecognition::InterfaceName() const {
181 return event_target_names::kSpeechRecognition;
182 }
183
GetExecutionContext() const184 ExecutionContext* SpeechRecognition::GetExecutionContext() const {
185 return ExecutionContextLifecycleObserver::GetExecutionContext();
186 }
187
ContextDestroyed()188 void SpeechRecognition::ContextDestroyed() {
189 controller_ = nullptr;
190 }
191
HasPendingActivity() const192 bool SpeechRecognition::HasPendingActivity() const {
193 return started_;
194 }
195
PageVisibilityChanged()196 void SpeechRecognition::PageVisibilityChanged() {
197 #if defined(OS_ANDROID)
198 if (!GetPage()->IsPageVisible())
199 abort();
200 #endif
201 }
202
OnConnectionError()203 void SpeechRecognition::OnConnectionError() {
204 ErrorOccurred(mojom::blink::SpeechRecognitionError::New(
205 mojom::blink::SpeechRecognitionErrorCode::kNetwork,
206 mojom::blink::SpeechAudioErrorDetails::kNone));
207 Ended();
208 }
209
SpeechRecognition(LocalDOMWindow * window)210 SpeechRecognition::SpeechRecognition(LocalDOMWindow* window)
211 : ExecutionContextLifecycleObserver(window),
212 PageVisibilityObserver(window->GetFrame() ? window->GetFrame()->GetPage()
213 : nullptr),
214 grammars_(SpeechGrammarList::Create()), // FIXME: The spec is not clear
215 // on the default value for the
216 // grammars attribute.
217 continuous_(false),
218 interim_results_(false),
219 max_alternatives_(1),
220 controller_(SpeechRecognitionController::From(*window)),
221 started_(false),
222 stopping_(false),
223 receiver_(this, window),
224 session_(window) {}
225
226 SpeechRecognition::~SpeechRecognition() = default;
227
Trace(Visitor * visitor) const228 void SpeechRecognition::Trace(Visitor* visitor) const {
229 visitor->Trace(grammars_);
230 visitor->Trace(controller_);
231 visitor->Trace(final_results_);
232 visitor->Trace(receiver_);
233 visitor->Trace(session_);
234 EventTargetWithInlineData::Trace(visitor);
235 ExecutionContextLifecycleObserver::Trace(visitor);
236 PageVisibilityObserver::Trace(visitor);
237 }
238
239 } // namespace blink
240