1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "gui/base/msime_user_dictionary_importer.h"
31
32 #ifdef OS_WIN
33 #include <windows.h>
34
35 // In general, mixing different NTDDI_VERSION/_WIN32_WINNT values in a single
36 // executable file is not safe, but <msime.h> requires NTDDI_WIN8 to use COM
37 // interfaces and constants defined there, even though those APIs are available
38 // on older platforms such as Windows 7.
39 // To work around this limitation, here we intentionally re-define those macros.
40 // TODO(yukawa): Remove the following hack when we stop supporting Windows 7.
41
42 // Redefine NTDDI_VERSION with NTDDI_WIN8
43 #ifdef NTDDI_VERSION
44 #define MOZC_ORIGINAL_NTDDI_VERSION NTDDI_VERSION
45 #undef NTDDI_VERSION
46 #endif // NTDDI_VERSION
47 #define NTDDI_VERSION 0x06020000 // == NTDDI_WIN8
48
49 // Redefine _WIN32_WINNT with WIN32_WINNT_WIN8
50 #ifdef _WIN32_WINNT
51 #define MOZC_ORIGINAL_WIN32_WINNT _WIN32_WINNT
52 #undef _WIN32_WINNT
53 #endif // MOZC_ORIGINAL_WIN32_WINNT
54 #define _WIN32_WINNT 0x0602 // == WIN32_WINNT_WIN8
55
56 #include <msime.h>
57
58 // Restore NTDDI_VERSION
59 #ifdef MOZC_ORIGINAL_NTDDI_VERSION
60 #undef NTDDI_VERSION
61 #define NTDDI_VERSION MOZC_ORIGINAL_NTDDI_VERSION
62 #endif // MOZC_ORIGINAL_NTDDI_VERSION
63
64 // Restore _WIN32_WINNT
65 #ifdef MOZC_ORIGINAL_WIN32_WINNT
66 #undef _WIN32_WINNT
67 #define _WIN32_WINNT MOZC_ORIGINAL_WIN32_WINNT
68 #endif // MOZC_ORIGINAL_WIN32_WINNT
69
70 #include <algorithm>
71 #include <map>
72 #include <set>
73 #include <string>
74 #include <vector>
75
76 #include "base/compiler_specific.h"
77 #include "base/hash.h"
78 #include "base/logging.h"
79 #include "base/mmap.h"
80 #include "base/number_util.h"
81 #include "base/port.h"
82 #include "base/system_util.h"
83 #include "base/util.h"
84 #include "base/win_util.h"
85 #include "dictionary/user_dictionary_util.h"
86 #include "gui/base/encoding_util.h"
87
88 namespace mozc {
89
90 using user_dictionary::UserDictionary;
91 using user_dictionary::UserDictionaryCommandStatus;
92
93 namespace {
94
95 const size_t kBufferSize = 256;
96
97 // ProgID of MS-IME Japanese.
98 const wchar_t kVersionIndependentProgIdForMSIME[] = L"MSIME.Japan";
99
100 // Interface identifier of user dictionary in MS-IME.
101 // {019F7153-E6DB-11d0-83C3-00C04FDDB82E}
102 const GUID kIidIFEDictionary = {
103 0x19f7153, 0xe6db, 0x11d0, {0x83, 0xc3, 0x0, 0xc0, 0x4f, 0xdd, 0xb8, 0x2e}
104 };
105
CreateIFEDictionary()106 IFEDictionary *CreateIFEDictionary() {
107 CLSID class_id = GUID_NULL;
108 // On Windows 7 and prior, multiple versions of MS-IME can be installed
109 // side-by-side. As far as we've observed, the latest version will be chosen
110 // with version-independent ProgId.
111 HRESULT result = ::CLSIDFromProgID(kVersionIndependentProgIdForMSIME,
112 &class_id);
113 if (FAILED(result)) {
114 LOG(ERROR) << "CLSIDFromProgID() failed: " << result;
115 return nullptr;
116 }
117 IFEDictionary *obj = nullptr;
118 result = ::CoCreateInstance(class_id,
119 nullptr,
120 CLSCTX_INPROC_SERVER,
121 kIidIFEDictionary,
122 reinterpret_cast<void **>(&obj));
123 if (FAILED(result)) {
124 LOG(ERROR) << "CoCreateInstance() failed: " << result;
125 return nullptr;
126 }
127 VLOG(1) << "Can create IFEDictionary successfully";
128 return obj;
129 }
130
131 class ScopedIFEDictionary {
132 public:
ScopedIFEDictionary(IFEDictionary * dic)133 explicit ScopedIFEDictionary(IFEDictionary *dic)
134 : dic_(dic) {}
135
~ScopedIFEDictionary()136 ~ScopedIFEDictionary() {
137 if (dic_ != NULL) {
138 dic_->Close();
139 dic_->Release();
140 }
141 }
142
operator *() const143 IFEDictionary & operator*() const { return *dic_; }
operator ->() const144 IFEDictionary* operator->() const { return dic_; }
get() const145 IFEDictionary* get() const { return dic_; }
146
147 private:
148 IFEDictionary *dic_;
149 };
150
151 // Iterator for MS-IME user dictionary
152 class MSIMEImportIterator
153 : public UserDictionaryImporter::InputIteratorInterface {
154 public:
MSIMEImportIterator()155 MSIMEImportIterator()
156 : dic_(CreateIFEDictionary()),
157 buf_(kBufferSize), result_(E_FAIL), size_(0), index_(0) {
158 if (dic_.get() == NULL) {
159 LOG(ERROR) << "IFEDictionaryFactory returned NULL";
160 return;
161 }
162
163 // open user dictionary
164 HRESULT result = dic_->Open(NULL, NULL);
165 if (S_OK != result) {
166 LOG(ERROR) << "Cannot open user dictionary: " << result_;
167 return;
168 }
169
170 POSTBL *pos_table = NULL;
171 int pos_size = 0;
172 result_ = dic_->GetPosTable(&pos_table, &pos_size);
173 if (S_OK != result_ || pos_table == NULL || pos_size == 0) {
174 LOG(ERROR) << "Cannot get POS table: " << result;
175 result_ = E_FAIL;
176 return;
177 }
178
179 string name;
180 for (int i = 0; i < pos_size; ++i) {
181 EncodingUtil::SJISToUTF8(
182 reinterpret_cast<char *>(pos_table->szName), &name);
183 pos_map_.insert(std::make_pair(pos_table->nPos, name));
184 ++pos_table;
185 }
186
187 // extract all words registered by user.
188 // Don't use auto-registered words, since Mozc may not be able to
189 // handle auto_registered words correctly, and user is basically
190 // unaware of auto-registered words.
191 result_ = dic_->GetWords(NULL, NULL, NULL,
192 IFED_POS_ALL,
193 IFED_SELECT_ALL,
194 IFED_REG_USER, // | FED_REG_AUTO
195 reinterpret_cast<UCHAR *>(&buf_[0]),
196 kBufferSize * sizeof(IMEWRD),
197 &size_);
198 }
199
IsAvailable() const200 bool IsAvailable() const {
201 return result_ == IFED_S_MORE_ENTRIES || result_ == S_OK;
202 }
203
204 // NOTE: Without "UserDictionaryImporter::", Visual C++ 2008 somehow fails
205 // to look up the type name.
Next(UserDictionaryImporter::RawEntry * entry)206 bool Next(UserDictionaryImporter::RawEntry *entry) {
207 if (!IsAvailable()) {
208 LOG(ERROR) << "Iterator is not available";
209 return false;
210 }
211
212 if (entry == NULL) {
213 LOG(ERROR) << "Entry is NULL";
214 return false;
215 }
216 entry->Clear();
217
218 if (index_ < size_) {
219 if (buf_[index_].pwchReading == NULL ||
220 buf_[index_].pwchDisplay == NULL) {
221 ++index_;
222 LOG(ERROR) << "pwchDisplay or pwchReading is NULL";
223 return true;
224 }
225
226 // set key/value
227 Util::WideToUTF8(buf_[index_].pwchReading, &entry->key);
228 Util::WideToUTF8(buf_[index_].pwchDisplay, &entry->value);
229
230 // set POS
231 std::map<int, string>::const_iterator it =
232 pos_map_.find(buf_[index_].nPos1);
233 if (it == pos_map_.end()) {
234 ++index_;
235 LOG(ERROR) << "Unknown POS id: " << buf_[index_].nPos1;
236 entry->Clear();
237 return true;
238 }
239 entry->pos = it->second;
240
241 // set comment
242 if (buf_[index_].pvComment != NULL) {
243 if (buf_[index_].uct == IFED_UCT_STRING_SJIS) {
244 EncodingUtil::SJISToUTF8(
245 reinterpret_cast<const char *>(buf_[index_].pvComment),
246 &entry->comment);
247 } else if (buf_[index_].uct == IFED_UCT_STRING_UNICODE) {
248 Util::WideToUTF8(
249 reinterpret_cast<const wchar_t *>(buf_[index_].pvComment),
250 &entry->comment);
251 }
252 }
253 }
254
255 if (index_ < size_) {
256 ++index_;
257 return true;
258 } else if (result_ == S_OK) {
259 return false;
260 } else if (result_ == IFED_S_MORE_ENTRIES) {
261 result_ = dic_->NextWords(reinterpret_cast<UCHAR *>(&buf_[0]),
262 kBufferSize * sizeof(IMEWRD),
263 &size_);
264 if (result_ == E_FAIL) {
265 LOG(ERROR) << "NextWords() failed";
266 return false;
267 }
268 index_ = 0;
269 return true;
270 }
271
272 return false;
273 }
274
275 private:
276 std::vector<IMEWRD> buf_;
277 ScopedIFEDictionary dic_;
278 std::map<int, string> pos_map_;
279 HRESULT result_;
280 ULONG size_;
281 ULONG index_;
282 };
283
284 } // namespace
285
286 namespace gui {
287
288 UserDictionaryImporter::InputIteratorInterface *
Create()289 MSIMEUserDictionarImporter::Create() {
290 return new MSIMEImportIterator;
291 }
292
293 } // namespace gui
294 } // namespace mozc
295
296 #else // OS_WIN
297
298 namespace mozc {
299 namespace gui {
300
301 UserDictionaryImporter::InputIteratorInterface *
Create()302 MSIMEUserDictionarImporter::Create() {
303 return nullptr;
304 }
305
306 } // namespace gui
307 } // namespace mozc
308
309 #endif // OS_WIN
310