1 /*
2  * Copyright (C) 2012 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *  * Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "third_party/blink/renderer/modules/speech/speech_recognition.h"
27 
28 #include <algorithm>
29 #include "build/build_config.h"
30 #include "mojo/public/cpp/bindings/pending_receiver.h"
31 #include "mojo/public/cpp/bindings/pending_remote.h"
32 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
33 #include "third_party/blink/renderer/core/frame/local_frame.h"
34 #include "third_party/blink/renderer/core/page/page.h"
35 #include "third_party/blink/renderer/modules/speech/speech_recognition_controller.h"
36 #include "third_party/blink/renderer/modules/speech/speech_recognition_error_event.h"
37 #include "third_party/blink/renderer/modules/speech/speech_recognition_event.h"
38 #include "third_party/blink/renderer/platform/bindings/exception_state.h"
39 #include "third_party/blink/renderer/platform/heap/heap.h"
40 
41 namespace blink {
42 
Create(ExecutionContext * context)43 SpeechRecognition* SpeechRecognition::Create(ExecutionContext* context) {
44   return MakeGarbageCollected<SpeechRecognition>(To<LocalDOMWindow>(context));
45 }
46 
start(ExceptionState & exception_state)47 void SpeechRecognition::start(ExceptionState& exception_state) {
48   if (!controller_ || !GetExecutionContext())
49     return;
50 
51   if (started_) {
52     exception_state.ThrowDOMException(DOMExceptionCode::kInvalidStateError,
53                                       "recognition has already started.");
54     return;
55   }
56 
57   final_results_.clear();
58 
59   mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient>
60       session_client;
61   // See https://bit.ly/2S0zRAS for task types.
62   receiver_.Bind(
63       session_client.InitWithNewPipeAndPassReceiver(),
64       GetExecutionContext()->GetTaskRunner(TaskType::kMiscPlatformAPI));
65   receiver_.set_disconnect_handler(WTF::Bind(
66       &SpeechRecognition::OnConnectionError, WrapWeakPersistent(this)));
67 
68   controller_->Start(
69       session_.BindNewPipeAndPassReceiver(
70           GetExecutionContext()->GetTaskRunner(TaskType::kMiscPlatformAPI)),
71       std::move(session_client), *grammars_, lang_, continuous_,
72       interim_results_, max_alternatives_);
73   started_ = true;
74 }
75 
stopFunction()76 void SpeechRecognition::stopFunction() {
77   if (!controller_)
78     return;
79 
80   if (started_ && !stopping_) {
81     stopping_ = true;
82     session_->StopCapture();
83   }
84 }
85 
abort()86 void SpeechRecognition::abort() {
87   if (!controller_)
88     return;
89 
90   if (started_ && !stopping_) {
91     stopping_ = true;
92     session_->Abort();
93   }
94 }
95 
ResultRetrieved(WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results)96 void SpeechRecognition::ResultRetrieved(
97     WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) {
98   auto* it = std::stable_partition(
99       results.begin(), results.end(),
100       [](const auto& result) { return !result->is_provisional; });
101   wtf_size_t provisional_count = static_cast<wtf_size_t>(results.end() - it);
102 
103   // Add the new results to the previous final results.
104   HeapVector<Member<SpeechRecognitionResult>> aggregated_results =
105       std::move(final_results_);
106   aggregated_results.ReserveCapacity(aggregated_results.size() +
107                                      results.size());
108 
109   for (const auto& result : results) {
110     HeapVector<Member<SpeechRecognitionAlternative>> alternatives;
111     alternatives.ReserveInitialCapacity(result->hypotheses.size());
112     for (const auto& hypothesis : result->hypotheses) {
113       alternatives.push_back(MakeGarbageCollected<SpeechRecognitionAlternative>(
114           hypothesis->utterance, hypothesis->confidence));
115     }
116     aggregated_results.push_back(SpeechRecognitionResult::Create(
117         std::move(alternatives), !result->is_provisional));
118   }
119 
120   // |aggregated_results| now contains the following (in the given order):
121   //
122   // (1) previous final results from |final_results_|
123   // (2) new final results from |results|
124   // (3) new provisional results from |results|
125 
126   // |final_results_| = (1) + (2).
127   HeapVector<Member<SpeechRecognitionResult>> new_final_results;
128   new_final_results.ReserveInitialCapacity(aggregated_results.size() -
129                                            provisional_count);
130   new_final_results.AppendRange(aggregated_results.begin(),
131                                 aggregated_results.end() - provisional_count);
132   final_results_ = std::move(new_final_results);
133 
134   // We dispatch an event with (1) + (2) + (3).
135   DispatchEvent(*SpeechRecognitionEvent::CreateResult(
136       aggregated_results.size() - results.size(),
137       std::move(aggregated_results)));
138 }
139 
ErrorOccurred(mojom::blink::SpeechRecognitionErrorPtr error)140 void SpeechRecognition::ErrorOccurred(
141     mojom::blink::SpeechRecognitionErrorPtr error) {
142   if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
143     DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
144   } else {
145     // TODO(primiano): message?
146     DispatchEvent(*SpeechRecognitionErrorEvent::Create(error->code, String()));
147   }
148 }
149 
Started()150 void SpeechRecognition::Started() {
151   DispatchEvent(*Event::Create(event_type_names::kStart));
152 }
153 
AudioStarted()154 void SpeechRecognition::AudioStarted() {
155   DispatchEvent(*Event::Create(event_type_names::kAudiostart));
156 }
157 
SoundStarted()158 void SpeechRecognition::SoundStarted() {
159   DispatchEvent(*Event::Create(event_type_names::kSoundstart));
160   DispatchEvent(*Event::Create(event_type_names::kSpeechstart));
161 }
162 
SoundEnded()163 void SpeechRecognition::SoundEnded() {
164   DispatchEvent(*Event::Create(event_type_names::kSpeechend));
165   DispatchEvent(*Event::Create(event_type_names::kSoundend));
166 }
167 
AudioEnded()168 void SpeechRecognition::AudioEnded() {
169   DispatchEvent(*Event::Create(event_type_names::kAudioend));
170 }
171 
Ended()172 void SpeechRecognition::Ended() {
173   started_ = false;
174   stopping_ = false;
175   session_.reset();
176   receiver_.reset();
177   DispatchEvent(*Event::Create(event_type_names::kEnd));
178 }
179 
InterfaceName() const180 const AtomicString& SpeechRecognition::InterfaceName() const {
181   return event_target_names::kSpeechRecognition;
182 }
183 
GetExecutionContext() const184 ExecutionContext* SpeechRecognition::GetExecutionContext() const {
185   return ExecutionContextLifecycleObserver::GetExecutionContext();
186 }
187 
ContextDestroyed()188 void SpeechRecognition::ContextDestroyed() {
189   controller_ = nullptr;
190 }
191 
HasPendingActivity() const192 bool SpeechRecognition::HasPendingActivity() const {
193   return started_;
194 }
195 
PageVisibilityChanged()196 void SpeechRecognition::PageVisibilityChanged() {
197 #if defined(OS_ANDROID)
198   if (!GetPage()->IsPageVisible())
199     abort();
200 #endif
201 }
202 
OnConnectionError()203 void SpeechRecognition::OnConnectionError() {
204   ErrorOccurred(mojom::blink::SpeechRecognitionError::New(
205       mojom::blink::SpeechRecognitionErrorCode::kNetwork,
206       mojom::blink::SpeechAudioErrorDetails::kNone));
207   Ended();
208 }
209 
SpeechRecognition(LocalDOMWindow * window)210 SpeechRecognition::SpeechRecognition(LocalDOMWindow* window)
211     : ExecutionContextLifecycleObserver(window),
212       PageVisibilityObserver(window->GetFrame() ? window->GetFrame()->GetPage()
213                                                 : nullptr),
214       grammars_(SpeechGrammarList::Create()),  // FIXME: The spec is not clear
215                                                // on the default value for the
216                                                // grammars attribute.
217       continuous_(false),
218       interim_results_(false),
219       max_alternatives_(1),
220       controller_(SpeechRecognitionController::From(*window)),
221       started_(false),
222       stopping_(false),
223       receiver_(this, window),
224       session_(window) {}
225 
226 SpeechRecognition::~SpeechRecognition() = default;
227 
Trace(Visitor * visitor) const228 void SpeechRecognition::Trace(Visitor* visitor) const {
229   visitor->Trace(grammars_);
230   visitor->Trace(controller_);
231   visitor->Trace(final_results_);
232   visitor->Trace(receiver_);
233   visitor->Trace(session_);
234   EventTargetWithInlineData::Trace(visitor);
235   ExecutionContextLifecycleObserver::Trace(visitor);
236   PageVisibilityObserver::Trace(visitor);
237 }
238 
239 }  // namespace blink
240