1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "services/shape_detection/text_detection_impl_win.h"
6 
7 #include <windows.foundation.collections.h>
8 #include <windows.globalization.h>
9 #include <memory>
10 #include <string>
11 
12 #include "base/bind.h"
13 #include "base/logging.h"
14 #include "base/win/core_winrt_util.h"
15 #include "base/win/post_async_results.h"
16 #include "base/win/scoped_hstring.h"
17 #include "base/win/windows_version.h"
18 #include "mojo/public/cpp/bindings/pending_receiver.h"
19 #include "mojo/public/cpp/bindings/self_owned_receiver.h"
20 #include "services/shape_detection/detection_utils_win.h"
21 #include "services/shape_detection/text_detection_impl.h"
22 #include "ui/gfx/geometry/rect_f.h"
23 
24 namespace shape_detection {
25 
26 using ABI::Windows::Foundation::IAsyncOperation;
27 using ABI::Windows::Foundation::Collections::IVectorView;
28 using ABI::Windows::Globalization::ILanguageFactory;
29 using ABI::Windows::Graphics::Imaging::ISoftwareBitmap;
30 using ABI::Windows::Graphics::Imaging::ISoftwareBitmapStatics;
31 using ABI::Windows::Media::Ocr::IOcrEngine;
32 using ABI::Windows::Media::Ocr::IOcrEngineStatics;
33 using ABI::Windows::Media::Ocr::IOcrLine;
34 using ABI::Windows::Media::Ocr::IOcrResult;
35 using ABI::Windows::Media::Ocr::IOcrWord;
36 using ABI::Windows::Media::Ocr::OcrLine;
37 using ABI::Windows::Media::Ocr::OcrResult;
38 using ABI::Windows::Media::Ocr::OcrWord;
39 using base::win::GetActivationFactory;
40 using base::win::ScopedHString;
41 using Microsoft::WRL::ComPtr;
42 
43 // static
Create(mojo::PendingReceiver<mojom::TextDetection> receiver)44 void TextDetectionImpl::Create(
45     mojo::PendingReceiver<mojom::TextDetection> receiver) {
46   // OcrEngine class is only available in Win 10 onwards (v10.0.10240.0) that
47   // documents in
48   // https://docs.microsoft.com/en-us/uwp/api/windows.media.ocr.ocrengine.
49   if (base::win::GetVersion() < base::win::Version::WIN10) {
50     DVLOG(1) << "Optical character recognition not supported before Windows 10";
51     return;
52   }
53   DCHECK_GE(base::win::OSInfo::GetInstance()->version_number().build, 10240);
54 
55   // Loads functions dynamically at runtime to prevent library dependencies.
56   if (!(base::win::ResolveCoreWinRTDelayload() &&
57         ScopedHString::ResolveCoreWinRTStringDelayload())) {
58     DLOG(ERROR) << "Failed loading functions from combase.dll";
59     return;
60   }
61 
62   // Text Detection specification only supports Latin-1 text as documented in
63   // https://wicg.github.io/shape-detection-api/text.html#text-detection-api.
64   // TODO(junwei.fu): https://crbug.com/794097 consider supporting other Latin
65   // script language.
66   ScopedHString language_hstring = ScopedHString::Create("en");
67   if (!language_hstring.is_valid())
68     return;
69 
70   ComPtr<ILanguageFactory> language_factory;
71   HRESULT hr =
72       GetActivationFactory<ILanguageFactory,
73                            RuntimeClass_Windows_Globalization_Language>(
74           &language_factory);
75   if (FAILED(hr)) {
76     DLOG(ERROR) << "ILanguage factory failed: "
77                 << logging::SystemErrorCodeToString(hr);
78     return;
79   }
80 
81   ComPtr<ABI::Windows::Globalization::ILanguage> language;
82   hr = language_factory->CreateLanguage(language_hstring.get(), &language);
83   if (FAILED(hr)) {
84     DLOG(ERROR) << "Create language failed: "
85                 << logging::SystemErrorCodeToString(hr);
86     return;
87   }
88 
89   ComPtr<IOcrEngineStatics> engine_factory;
90   hr = GetActivationFactory<IOcrEngineStatics,
91                             RuntimeClass_Windows_Media_Ocr_OcrEngine>(
92       &engine_factory);
93   if (FAILED(hr)) {
94     DLOG(ERROR) << "IOcrEngineStatics factory failed: "
95                 << logging::SystemErrorCodeToString(hr);
96     return;
97   }
98 
99   boolean is_supported = false;
100   hr = engine_factory->IsLanguageSupported(language.Get(), &is_supported);
101   if (FAILED(hr) || !is_supported)
102     return;
103 
104   ComPtr<IOcrEngine> ocr_engine;
105   hr = engine_factory->TryCreateFromLanguage(language.Get(), &ocr_engine);
106   if (FAILED(hr)) {
107     DLOG(ERROR) << "Create engine failed from language: "
108                 << logging::SystemErrorCodeToString(hr);
109     return;
110   }
111 
112   ComPtr<ISoftwareBitmapStatics> bitmap_factory;
113   hr = GetActivationFactory<
114       ISoftwareBitmapStatics,
115       RuntimeClass_Windows_Graphics_Imaging_SoftwareBitmap>(&bitmap_factory);
116   if (FAILED(hr)) {
117     DLOG(ERROR) << "ISoftwareBitmapStatics factory failed: "
118                 << logging::SystemErrorCodeToString(hr);
119     return;
120   }
121 
122   auto impl = std::make_unique<TextDetectionImplWin>(std::move(ocr_engine),
123                                                      std::move(bitmap_factory));
124   auto* impl_ptr = impl.get();
125   impl_ptr->SetReceiver(
126       mojo::MakeSelfOwnedReceiver(std::move(impl), std::move(receiver)));
127 }
128 
TextDetectionImplWin(ComPtr<IOcrEngine> ocr_engine,ComPtr<ISoftwareBitmapStatics> bitmap_factory)129 TextDetectionImplWin::TextDetectionImplWin(
130     ComPtr<IOcrEngine> ocr_engine,
131     ComPtr<ISoftwareBitmapStatics> bitmap_factory)
132     : ocr_engine_(std::move(ocr_engine)),
133       bitmap_factory_(std::move(bitmap_factory)) {
134   DCHECK(ocr_engine_);
135   DCHECK(bitmap_factory_);
136 }
137 
138 TextDetectionImplWin::~TextDetectionImplWin() = default;
139 
Detect(const SkBitmap & bitmap,DetectCallback callback)140 void TextDetectionImplWin::Detect(const SkBitmap& bitmap,
141                                   DetectCallback callback) {
142   if (FAILED(BeginDetect(bitmap))) {
143     // No detection taking place; run |callback| with an empty array of results.
144     std::move(callback).Run(std::vector<mojom::TextDetectionResultPtr>());
145     return;
146   }
147   // Hold on the callback until AsyncOperation completes.
148   recognize_text_callback_ = std::move(callback);
149   // This prevents the Detect function from being called before the
150   // AsyncOperation completes.
151   receiver_->PauseIncomingMethodCallProcessing();
152 }
153 
BeginDetect(const SkBitmap & bitmap)154 HRESULT TextDetectionImplWin::BeginDetect(const SkBitmap& bitmap) {
155   ComPtr<ISoftwareBitmap> win_bitmap =
156       CreateWinBitmapFromSkBitmap(bitmap, bitmap_factory_.Get());
157   if (!win_bitmap)
158     return E_FAIL;
159 
160   // Recognize text asynchronously.
161   ComPtr<IAsyncOperation<OcrResult*>> async_op;
162   const HRESULT hr = ocr_engine_->RecognizeAsync(win_bitmap.Get(), &async_op);
163   if (FAILED(hr)) {
164     DLOG(ERROR) << "Recognize text asynchronously failed: "
165                 << logging::SystemErrorCodeToString(hr);
166     return hr;
167   }
168 
169   // Use WeakPtr to bind the callback so that the once callback will not be run
170   // if this object has been already destroyed. |win_bitmap| needs to be kept
171   // alive until OnTextDetected().
172   return base::win::PostAsyncResults(
173       std::move(async_op),
174       base::BindOnce(&TextDetectionImplWin::OnTextDetected,
175                      weak_factory_.GetWeakPtr(), std::move(win_bitmap)));
176 }
177 
178 std::vector<mojom::TextDetectionResultPtr>
BuildTextDetectionResult(ComPtr<IOcrResult> ocr_result)179 TextDetectionImplWin::BuildTextDetectionResult(ComPtr<IOcrResult> ocr_result) {
180   std::vector<mojom::TextDetectionResultPtr> results;
181   if (!ocr_result)
182     return results;
183 
184   ComPtr<IVectorView<OcrLine*>> ocr_lines;
185   HRESULT hr = ocr_result->get_Lines(&ocr_lines);
186   if (FAILED(hr)) {
187     DLOG(ERROR) << "Get Lines failed: " << logging::SystemErrorCodeToString(hr);
188     return results;
189   }
190 
191   uint32_t count;
192   hr = ocr_lines->get_Size(&count);
193   if (FAILED(hr)) {
194     DLOG(ERROR) << "get_Size failed: " << logging::SystemErrorCodeToString(hr);
195     return results;
196   }
197 
198   results.reserve(count);
199   for (uint32_t i = 0; i < count; ++i) {
200     ComPtr<IOcrLine> line;
201     hr = ocr_lines->GetAt(i, &line);
202     if (FAILED(hr))
203       break;
204 
205     HSTRING text;
206     hr = line->get_Text(&text);
207     if (FAILED(hr))
208       break;
209 
210     // Gets bounding box with the words detected in the current line of Text.
211     ComPtr<IVectorView<OcrWord*>> ocr_words;
212     hr = line->get_Words(&ocr_words);
213     if (FAILED(hr))
214       break;
215 
216     uint32_t words_count;
217     hr = ocr_words->get_Size(&words_count);
218     if (FAILED(hr))
219       break;
220 
221     auto result = shape_detection::mojom::TextDetectionResult::New();
222     for (uint32_t i = 0; i < words_count; ++i) {
223       ComPtr<IOcrWord> word;
224       hr = ocr_words->GetAt(i, &word);
225       if (FAILED(hr))
226         break;
227 
228       ABI::Windows::Foundation::Rect bounds;
229       hr = word->get_BoundingRect(&bounds);
230       if (FAILED(hr))
231         break;
232 
233       result->bounding_box = gfx::UnionRects(
234           result->bounding_box,
235           gfx::RectF(bounds.X, bounds.Y, bounds.Width, bounds.Height));
236     }
237 
238     result->raw_value = ScopedHString(text).GetAsUTF8();
239     results.push_back(std::move(result));
240   }
241   return results;
242 }
243 
244 // |win_bitmap| is passed here so that it is kept alive until the AsyncOperation
245 // completes because RecognizeAsync does not hold a reference.
OnTextDetected(ComPtr<ISoftwareBitmap>,ComPtr<IOcrResult> ocr_result)246 void TextDetectionImplWin::OnTextDetected(
247     ComPtr<ISoftwareBitmap> /* win_bitmap */,
248     ComPtr<IOcrResult> ocr_result) {
249   std::move(recognize_text_callback_)
250       .Run(BuildTextDetectionResult(std::move(ocr_result)));
251   receiver_->ResumeIncomingMethodCallProcessing();
252 }
253 
254 }  // namespace shape_detection
255