1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "services/shape_detection/text_detection_impl_win.h"
6
7 #include <windows.foundation.collections.h>
8 #include <windows.globalization.h>
9 #include <memory>
10 #include <string>
11
12 #include "base/bind.h"
13 #include "base/logging.h"
14 #include "base/win/core_winrt_util.h"
15 #include "base/win/post_async_results.h"
16 #include "base/win/scoped_hstring.h"
17 #include "base/win/windows_version.h"
18 #include "mojo/public/cpp/bindings/pending_receiver.h"
19 #include "mojo/public/cpp/bindings/self_owned_receiver.h"
20 #include "services/shape_detection/detection_utils_win.h"
21 #include "services/shape_detection/text_detection_impl.h"
22 #include "ui/gfx/geometry/rect_f.h"
23
24 namespace shape_detection {
25
26 using ABI::Windows::Foundation::IAsyncOperation;
27 using ABI::Windows::Foundation::Collections::IVectorView;
28 using ABI::Windows::Globalization::ILanguageFactory;
29 using ABI::Windows::Graphics::Imaging::ISoftwareBitmap;
30 using ABI::Windows::Graphics::Imaging::ISoftwareBitmapStatics;
31 using ABI::Windows::Media::Ocr::IOcrEngine;
32 using ABI::Windows::Media::Ocr::IOcrEngineStatics;
33 using ABI::Windows::Media::Ocr::IOcrLine;
34 using ABI::Windows::Media::Ocr::IOcrResult;
35 using ABI::Windows::Media::Ocr::IOcrWord;
36 using ABI::Windows::Media::Ocr::OcrLine;
37 using ABI::Windows::Media::Ocr::OcrResult;
38 using ABI::Windows::Media::Ocr::OcrWord;
39 using base::win::GetActivationFactory;
40 using base::win::ScopedHString;
41 using Microsoft::WRL::ComPtr;
42
43 // static
Create(mojo::PendingReceiver<mojom::TextDetection> receiver)44 void TextDetectionImpl::Create(
45 mojo::PendingReceiver<mojom::TextDetection> receiver) {
46 // OcrEngine class is only available in Win 10 onwards (v10.0.10240.0) that
47 // documents in
48 // https://docs.microsoft.com/en-us/uwp/api/windows.media.ocr.ocrengine.
49 if (base::win::GetVersion() < base::win::Version::WIN10) {
50 DVLOG(1) << "Optical character recognition not supported before Windows 10";
51 return;
52 }
53 DCHECK_GE(base::win::OSInfo::GetInstance()->version_number().build, 10240);
54
55 // Loads functions dynamically at runtime to prevent library dependencies.
56 if (!(base::win::ResolveCoreWinRTDelayload() &&
57 ScopedHString::ResolveCoreWinRTStringDelayload())) {
58 DLOG(ERROR) << "Failed loading functions from combase.dll";
59 return;
60 }
61
62 // Text Detection specification only supports Latin-1 text as documented in
63 // https://wicg.github.io/shape-detection-api/text.html#text-detection-api.
64 // TODO(junwei.fu): https://crbug.com/794097 consider supporting other Latin
65 // script language.
66 ScopedHString language_hstring = ScopedHString::Create("en");
67 if (!language_hstring.is_valid())
68 return;
69
70 ComPtr<ILanguageFactory> language_factory;
71 HRESULT hr =
72 GetActivationFactory<ILanguageFactory,
73 RuntimeClass_Windows_Globalization_Language>(
74 &language_factory);
75 if (FAILED(hr)) {
76 DLOG(ERROR) << "ILanguage factory failed: "
77 << logging::SystemErrorCodeToString(hr);
78 return;
79 }
80
81 ComPtr<ABI::Windows::Globalization::ILanguage> language;
82 hr = language_factory->CreateLanguage(language_hstring.get(), &language);
83 if (FAILED(hr)) {
84 DLOG(ERROR) << "Create language failed: "
85 << logging::SystemErrorCodeToString(hr);
86 return;
87 }
88
89 ComPtr<IOcrEngineStatics> engine_factory;
90 hr = GetActivationFactory<IOcrEngineStatics,
91 RuntimeClass_Windows_Media_Ocr_OcrEngine>(
92 &engine_factory);
93 if (FAILED(hr)) {
94 DLOG(ERROR) << "IOcrEngineStatics factory failed: "
95 << logging::SystemErrorCodeToString(hr);
96 return;
97 }
98
99 boolean is_supported = false;
100 hr = engine_factory->IsLanguageSupported(language.Get(), &is_supported);
101 if (FAILED(hr) || !is_supported)
102 return;
103
104 ComPtr<IOcrEngine> ocr_engine;
105 hr = engine_factory->TryCreateFromLanguage(language.Get(), &ocr_engine);
106 if (FAILED(hr)) {
107 DLOG(ERROR) << "Create engine failed from language: "
108 << logging::SystemErrorCodeToString(hr);
109 return;
110 }
111
112 ComPtr<ISoftwareBitmapStatics> bitmap_factory;
113 hr = GetActivationFactory<
114 ISoftwareBitmapStatics,
115 RuntimeClass_Windows_Graphics_Imaging_SoftwareBitmap>(&bitmap_factory);
116 if (FAILED(hr)) {
117 DLOG(ERROR) << "ISoftwareBitmapStatics factory failed: "
118 << logging::SystemErrorCodeToString(hr);
119 return;
120 }
121
122 auto impl = std::make_unique<TextDetectionImplWin>(std::move(ocr_engine),
123 std::move(bitmap_factory));
124 auto* impl_ptr = impl.get();
125 impl_ptr->SetReceiver(
126 mojo::MakeSelfOwnedReceiver(std::move(impl), std::move(receiver)));
127 }
128
TextDetectionImplWin(ComPtr<IOcrEngine> ocr_engine,ComPtr<ISoftwareBitmapStatics> bitmap_factory)129 TextDetectionImplWin::TextDetectionImplWin(
130 ComPtr<IOcrEngine> ocr_engine,
131 ComPtr<ISoftwareBitmapStatics> bitmap_factory)
132 : ocr_engine_(std::move(ocr_engine)),
133 bitmap_factory_(std::move(bitmap_factory)) {
134 DCHECK(ocr_engine_);
135 DCHECK(bitmap_factory_);
136 }
137
138 TextDetectionImplWin::~TextDetectionImplWin() = default;
139
Detect(const SkBitmap & bitmap,DetectCallback callback)140 void TextDetectionImplWin::Detect(const SkBitmap& bitmap,
141 DetectCallback callback) {
142 if (FAILED(BeginDetect(bitmap))) {
143 // No detection taking place; run |callback| with an empty array of results.
144 std::move(callback).Run(std::vector<mojom::TextDetectionResultPtr>());
145 return;
146 }
147 // Hold on the callback until AsyncOperation completes.
148 recognize_text_callback_ = std::move(callback);
149 // This prevents the Detect function from being called before the
150 // AsyncOperation completes.
151 receiver_->PauseIncomingMethodCallProcessing();
152 }
153
BeginDetect(const SkBitmap & bitmap)154 HRESULT TextDetectionImplWin::BeginDetect(const SkBitmap& bitmap) {
155 ComPtr<ISoftwareBitmap> win_bitmap =
156 CreateWinBitmapFromSkBitmap(bitmap, bitmap_factory_.Get());
157 if (!win_bitmap)
158 return E_FAIL;
159
160 // Recognize text asynchronously.
161 ComPtr<IAsyncOperation<OcrResult*>> async_op;
162 const HRESULT hr = ocr_engine_->RecognizeAsync(win_bitmap.Get(), &async_op);
163 if (FAILED(hr)) {
164 DLOG(ERROR) << "Recognize text asynchronously failed: "
165 << logging::SystemErrorCodeToString(hr);
166 return hr;
167 }
168
169 // Use WeakPtr to bind the callback so that the once callback will not be run
170 // if this object has been already destroyed. |win_bitmap| needs to be kept
171 // alive until OnTextDetected().
172 return base::win::PostAsyncResults(
173 std::move(async_op),
174 base::BindOnce(&TextDetectionImplWin::OnTextDetected,
175 weak_factory_.GetWeakPtr(), std::move(win_bitmap)));
176 }
177
178 std::vector<mojom::TextDetectionResultPtr>
BuildTextDetectionResult(ComPtr<IOcrResult> ocr_result)179 TextDetectionImplWin::BuildTextDetectionResult(ComPtr<IOcrResult> ocr_result) {
180 std::vector<mojom::TextDetectionResultPtr> results;
181 if (!ocr_result)
182 return results;
183
184 ComPtr<IVectorView<OcrLine*>> ocr_lines;
185 HRESULT hr = ocr_result->get_Lines(&ocr_lines);
186 if (FAILED(hr)) {
187 DLOG(ERROR) << "Get Lines failed: " << logging::SystemErrorCodeToString(hr);
188 return results;
189 }
190
191 uint32_t count;
192 hr = ocr_lines->get_Size(&count);
193 if (FAILED(hr)) {
194 DLOG(ERROR) << "get_Size failed: " << logging::SystemErrorCodeToString(hr);
195 return results;
196 }
197
198 results.reserve(count);
199 for (uint32_t i = 0; i < count; ++i) {
200 ComPtr<IOcrLine> line;
201 hr = ocr_lines->GetAt(i, &line);
202 if (FAILED(hr))
203 break;
204
205 HSTRING text;
206 hr = line->get_Text(&text);
207 if (FAILED(hr))
208 break;
209
210 // Gets bounding box with the words detected in the current line of Text.
211 ComPtr<IVectorView<OcrWord*>> ocr_words;
212 hr = line->get_Words(&ocr_words);
213 if (FAILED(hr))
214 break;
215
216 uint32_t words_count;
217 hr = ocr_words->get_Size(&words_count);
218 if (FAILED(hr))
219 break;
220
221 auto result = shape_detection::mojom::TextDetectionResult::New();
222 for (uint32_t i = 0; i < words_count; ++i) {
223 ComPtr<IOcrWord> word;
224 hr = ocr_words->GetAt(i, &word);
225 if (FAILED(hr))
226 break;
227
228 ABI::Windows::Foundation::Rect bounds;
229 hr = word->get_BoundingRect(&bounds);
230 if (FAILED(hr))
231 break;
232
233 result->bounding_box = gfx::UnionRects(
234 result->bounding_box,
235 gfx::RectF(bounds.X, bounds.Y, bounds.Width, bounds.Height));
236 }
237
238 result->raw_value = ScopedHString(text).GetAsUTF8();
239 results.push_back(std::move(result));
240 }
241 return results;
242 }
243
244 // |win_bitmap| is passed here so that it is kept alive until the AsyncOperation
245 // completes because RecognizeAsync does not hold a reference.
OnTextDetected(ComPtr<ISoftwareBitmap>,ComPtr<IOcrResult> ocr_result)246 void TextDetectionImplWin::OnTextDetected(
247 ComPtr<ISoftwareBitmap> /* win_bitmap */,
248 ComPtr<IOcrResult> ocr_result) {
249 std::move(recognize_text_callback_)
250 .Run(BuildTextDetectionResult(std::move(ocr_result)));
251 receiver_->ResumeIncomingMethodCallProcessing();
252 }
253
254 } // namespace shape_detection
255