1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "gui/base/msime_user_dictionary_importer.h"
31 
32 #ifdef OS_WIN
33 #include <windows.h>
34 
35 // In general, mixing different NTDDI_VERSION/_WIN32_WINNT values in a single
36 // executable file is not safe, but <msime.h> requires NTDDI_WIN8 to use COM
37 // interfaces and constants defined there, even though those APIs are available
38 // on older platforms such as Windows 7.
39 // To work around this limitation, here we intentionally re-define those macros.
40 // TODO(yukawa): Remove the following hack when we stop supporting Windows 7.
41 
42 // Redefine NTDDI_VERSION with NTDDI_WIN8
43 #ifdef NTDDI_VERSION
44 #define MOZC_ORIGINAL_NTDDI_VERSION NTDDI_VERSION
45 #undef NTDDI_VERSION
46 #endif  // NTDDI_VERSION
47 #define NTDDI_VERSION 0x06020000  // == NTDDI_WIN8
48 
49 // Redefine _WIN32_WINNT with WIN32_WINNT_WIN8
50 #ifdef _WIN32_WINNT
51 #define MOZC_ORIGINAL_WIN32_WINNT _WIN32_WINNT
52 #undef _WIN32_WINNT
53 #endif  // MOZC_ORIGINAL_WIN32_WINNT
54 #define _WIN32_WINNT 0x0602       // == WIN32_WINNT_WIN8
55 
56 #include <msime.h>
57 
58 // Restore NTDDI_VERSION
59 #ifdef MOZC_ORIGINAL_NTDDI_VERSION
60 #undef NTDDI_VERSION
61 #define NTDDI_VERSION MOZC_ORIGINAL_NTDDI_VERSION
62 #endif  // MOZC_ORIGINAL_NTDDI_VERSION
63 
64 // Restore _WIN32_WINNT
65 #ifdef MOZC_ORIGINAL_WIN32_WINNT
66 #undef _WIN32_WINNT
67 #define _WIN32_WINNT MOZC_ORIGINAL_WIN32_WINNT
68 #endif  // MOZC_ORIGINAL_WIN32_WINNT
69 
70 #include <algorithm>
71 #include <map>
72 #include <set>
73 #include <string>
74 #include <vector>
75 
76 #include "base/compiler_specific.h"
77 #include "base/hash.h"
78 #include "base/logging.h"
79 #include "base/mmap.h"
80 #include "base/number_util.h"
81 #include "base/port.h"
82 #include "base/system_util.h"
83 #include "base/util.h"
84 #include "base/win_util.h"
85 #include "dictionary/user_dictionary_util.h"
86 #include "gui/base/encoding_util.h"
87 
88 namespace mozc {
89 
90 using user_dictionary::UserDictionary;
91 using user_dictionary::UserDictionaryCommandStatus;
92 
93 namespace {
94 
95 const size_t kBufferSize = 256;
96 
97 // ProgID of MS-IME Japanese.
98 const wchar_t kVersionIndependentProgIdForMSIME[] = L"MSIME.Japan";
99 
100 // Interface identifier of user dictionary in MS-IME.
101 // {019F7153-E6DB-11d0-83C3-00C04FDDB82E}
102 const GUID kIidIFEDictionary = {
103   0x19f7153, 0xe6db, 0x11d0, {0x83, 0xc3, 0x0, 0xc0, 0x4f, 0xdd, 0xb8, 0x2e}
104 };
105 
CreateIFEDictionary()106 IFEDictionary *CreateIFEDictionary() {
107   CLSID class_id = GUID_NULL;
108   // On Windows 7 and prior, multiple versions of MS-IME can be installed
109   // side-by-side. As far as we've observed, the latest version will be chosen
110   // with version-independent ProgId.
111   HRESULT result = ::CLSIDFromProgID(kVersionIndependentProgIdForMSIME,
112                                      &class_id);
113   if (FAILED(result)) {
114     LOG(ERROR) << "CLSIDFromProgID() failed: " << result;
115     return nullptr;
116   }
117   IFEDictionary *obj = nullptr;
118   result = ::CoCreateInstance(class_id,
119                               nullptr,
120                               CLSCTX_INPROC_SERVER,
121                               kIidIFEDictionary,
122                               reinterpret_cast<void **>(&obj));
123   if (FAILED(result)) {
124     LOG(ERROR) << "CoCreateInstance() failed: " << result;
125     return nullptr;
126   }
127   VLOG(1) << "Can create IFEDictionary successfully";
128   return obj;
129 }
130 
131 class ScopedIFEDictionary {
132  public:
ScopedIFEDictionary(IFEDictionary * dic)133   explicit ScopedIFEDictionary(IFEDictionary *dic)
134       : dic_(dic) {}
135 
~ScopedIFEDictionary()136   ~ScopedIFEDictionary() {
137     if (dic_ != NULL) {
138       dic_->Close();
139       dic_->Release();
140     }
141   }
142 
operator *() const143   IFEDictionary & operator*() const { return *dic_; }
operator ->() const144   IFEDictionary* operator->() const { return dic_; }
get() const145   IFEDictionary* get() const { return dic_; }
146 
147  private:
148   IFEDictionary *dic_;
149 };
150 
151 // Iterator for MS-IME user dictionary
152 class MSIMEImportIterator
153     : public UserDictionaryImporter::InputIteratorInterface {
154  public:
MSIMEImportIterator()155   MSIMEImportIterator()
156       : dic_(CreateIFEDictionary()),
157         buf_(kBufferSize), result_(E_FAIL), size_(0), index_(0) {
158     if (dic_.get() == NULL) {
159       LOG(ERROR) << "IFEDictionaryFactory returned NULL";
160       return;
161     }
162 
163     // open user dictionary
164     HRESULT result = dic_->Open(NULL, NULL);
165     if (S_OK != result) {
166       LOG(ERROR) << "Cannot open user dictionary: " << result_;
167       return;
168     }
169 
170     POSTBL *pos_table = NULL;
171     int pos_size = 0;
172     result_ = dic_->GetPosTable(&pos_table, &pos_size);
173     if (S_OK != result_ || pos_table == NULL || pos_size == 0) {
174       LOG(ERROR) << "Cannot get POS table: " << result;
175       result_ = E_FAIL;
176       return;
177     }
178 
179     string name;
180     for (int i = 0; i < pos_size; ++i) {
181       EncodingUtil::SJISToUTF8(
182           reinterpret_cast<char *>(pos_table->szName), &name);
183       pos_map_.insert(std::make_pair(pos_table->nPos, name));
184       ++pos_table;
185     }
186 
187     // extract all words registered by user.
188     // Don't use auto-registered words, since Mozc may not be able to
189     // handle auto_registered words correctly, and user is basically
190     // unaware of auto-registered words.
191     result_ = dic_->GetWords(NULL, NULL, NULL,
192                              IFED_POS_ALL,
193                              IFED_SELECT_ALL,
194                              IFED_REG_USER,  // | FED_REG_AUTO
195                              reinterpret_cast<UCHAR *>(&buf_[0]),
196                              kBufferSize * sizeof(IMEWRD),
197                              &size_);
198   }
199 
IsAvailable() const200   bool IsAvailable() const {
201     return result_ == IFED_S_MORE_ENTRIES || result_ == S_OK;
202   }
203 
204   // NOTE: Without "UserDictionaryImporter::", Visual C++ 2008 somehow fails
205   //     to look up the type name.
Next(UserDictionaryImporter::RawEntry * entry)206   bool Next(UserDictionaryImporter::RawEntry *entry) {
207     if (!IsAvailable()) {
208       LOG(ERROR) << "Iterator is not available";
209       return false;
210     }
211 
212     if (entry == NULL) {
213       LOG(ERROR) << "Entry is NULL";
214       return false;
215     }
216     entry->Clear();
217 
218     if (index_ < size_) {
219       if (buf_[index_].pwchReading == NULL ||
220           buf_[index_].pwchDisplay == NULL) {
221         ++index_;
222         LOG(ERROR) << "pwchDisplay or pwchReading is NULL";
223         return true;
224       }
225 
226       // set key/value
227       Util::WideToUTF8(buf_[index_].pwchReading, &entry->key);
228       Util::WideToUTF8(buf_[index_].pwchDisplay, &entry->value);
229 
230       // set POS
231       std::map<int, string>::const_iterator it =
232           pos_map_.find(buf_[index_].nPos1);
233       if (it == pos_map_.end()) {
234         ++index_;
235         LOG(ERROR) << "Unknown POS id: " << buf_[index_].nPos1;
236         entry->Clear();
237         return true;
238       }
239       entry->pos = it->second;
240 
241       // set comment
242       if (buf_[index_].pvComment != NULL) {
243         if (buf_[index_].uct == IFED_UCT_STRING_SJIS) {
244           EncodingUtil::SJISToUTF8(
245               reinterpret_cast<const char *>(buf_[index_].pvComment),
246               &entry->comment);
247         } else if (buf_[index_].uct == IFED_UCT_STRING_UNICODE) {
248           Util::WideToUTF8(
249               reinterpret_cast<const wchar_t *>(buf_[index_].pvComment),
250               &entry->comment);
251         }
252       }
253     }
254 
255     if (index_ < size_) {
256       ++index_;
257       return true;
258     } else if (result_ == S_OK) {
259       return false;
260     } else if (result_ == IFED_S_MORE_ENTRIES) {
261       result_ = dic_->NextWords(reinterpret_cast<UCHAR *>(&buf_[0]),
262                                 kBufferSize * sizeof(IMEWRD),
263                                &size_);
264       if (result_ == E_FAIL) {
265         LOG(ERROR) << "NextWords() failed";
266         return false;
267       }
268       index_ = 0;
269       return true;
270     }
271 
272     return false;
273   }
274 
275  private:
276   std::vector<IMEWRD> buf_;
277   ScopedIFEDictionary dic_;
278   std::map<int, string> pos_map_;
279   HRESULT result_;
280   ULONG size_;
281   ULONG index_;
282 };
283 
284 }  // namespace
285 
286 namespace gui {
287 
288 UserDictionaryImporter::InputIteratorInterface *
Create()289 MSIMEUserDictionarImporter::Create() {
290   return new MSIMEImportIterator;
291 }
292 
293 }  // namespace gui
294 }  // namespace mozc
295 
296 #else  // OS_WIN
297 
298 namespace mozc {
299 namespace gui {
300 
301 UserDictionaryImporter::InputIteratorInterface *
Create()302 MSIMEUserDictionarImporter::Create() {
303   return nullptr;
304 }
305 
306 }  // namespace gui
307 }  // namespace mozc
308 
309 #endif  // OS_WIN
310