1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* enchant
3 * Copyright (C) 2003-2004 Joan Moratinos <jmo@softcatala.org>, Dom Lachowicz
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02110-1301, USA.
19 *
20 * In addition, as a special exception, Dom Lachowicz
21 * gives permission to link the code of this program with
22 * non-LGPL Spelling Provider libraries (eg: a MSFT Office
23 * spell checker backend) and distribute linked combinations including
24 * the two. You must obey the GNU General Public License in all
25 * respects for all of the code used other than said providers. If you modify
26 * this file, you may extend this exception to your version of the
27 * file, but you are not obligated to do so. If you do not wish to
28 * do so, delete this exception statement from your version.
29 */
30
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include <string>
36 #include <vector>
37
38 #include "enchant.h"
39 #include "enchant-provider.h"
40
41 #ifdef near
42 #undef near
43 #endif
44
45 /* built against hunspell 1.2.2 on 2008-04-12 */
46
47 #ifdef HUNSPELL_STATIC
48 #include "hunspell.hxx"
49 #else
50 #include <hunspell/hunspell.hxx>
51 #endif
52
53 ENCHANT_PLUGIN_DECLARE("Myspell")
54
55 #define G_ICONV_INVALID (GIConv)-1
56
57 #include <glib.h>
58
59 /***************************************************************************/
60
61 class MySpellChecker
62 {
63 public:
64 MySpellChecker(EnchantBroker * broker);
65 ~MySpellChecker();
66
67 bool checkWord (const char *word, size_t len);
68 char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
69
70 bool requestDictionary (const char * szLang);
71
72 private:
73 GIConv m_translate_in; /* Selected translation from/to Unicode */
74 GIConv m_translate_out;
75 Hunspell *myspell;
76 EnchantBroker *m_broker;
77 };
78
79 /***************************************************************************/
80
81 #if defined(_WIN32)
GetRegistryValue(HKEY baseKey,const WCHAR * uKeyName,const WCHAR * uKey)82 static WCHAR* GetRegistryValue(HKEY baseKey, const WCHAR * uKeyName, const WCHAR * uKey)
83 {
84 HKEY hKey;
85 unsigned long lType;
86 DWORD dwSize;
87 WCHAR* wszValue = NULL;
88
89 if(RegOpenKeyExW(baseKey, uKeyName, 0, KEY_READ, &hKey) == ERROR_SUCCESS)
90 {
91 /* Determine size of string */
92 if(RegQueryValueExW( hKey, uKey, NULL, &lType, NULL, &dwSize) == ERROR_SUCCESS)
93 {
94 wszValue = g_new0(WCHAR, dwSize + 1);
95 RegQueryValueExW(hKey, uKey, NULL, &lType, (LPBYTE) wszValue, &dwSize);
96 }
97 }
98
99 return wszValue;
100 }
101
102 static char *
myspell_checker_get_open_office_dicts_dir(void)103 myspell_checker_get_open_office_dicts_dir(void)
104 {
105 WCHAR* wszDirectory;
106 char* open_office_dir, * open_office_dicts_dir;
107
108 /*start by trying current user*/
109 wszDirectory = GetRegistryValue (HKEY_CURRENT_USER, L"Software\\Microsoft\\Windows\\CurrentVersion\\App Paths\\soffice.exe", L"Path");
110 if(wszDirectory == NULL)
111 {
112 /*next try local machine*/
113 wszDirectory = GetRegistryValue (HKEY_LOCAL_MACHINE, L"Software\\Microsoft\\Windows\\CurrentVersion\\App Paths\\soffice.exe", L"Path");
114 }
115
116 if(wszDirectory == NULL)
117 {
118 return NULL;
119 }
120
121 else {
122 open_office_dir = g_utf16_to_utf8 ((gunichar2*)wszDirectory, -1, NULL, NULL, NULL);
123 open_office_dicts_dir = g_build_filename(open_office_dir, "share", "dict", "ooo", NULL);
124 g_free(wszDirectory);
125 g_free(open_office_dir);
126 return open_office_dicts_dir;
127 }
128 }
129 #endif
130
131 static bool
g_iconv_is_valid(GIConv i)132 g_iconv_is_valid(GIConv i)
133 {
134 return (i != G_ICONV_INVALID);
135 }
136
MySpellChecker(EnchantBroker * broker)137 MySpellChecker::MySpellChecker(EnchantBroker * broker)
138 : m_translate_in(G_ICONV_INVALID), m_translate_out(G_ICONV_INVALID), myspell(0), m_broker(broker)
139 {
140 }
141
~MySpellChecker()142 MySpellChecker::~MySpellChecker()
143 {
144 delete myspell;
145 if (g_iconv_is_valid (m_translate_in ))
146 g_iconv_close(m_translate_in);
147 if (g_iconv_is_valid(m_translate_out))
148 g_iconv_close(m_translate_out);
149 }
150
151 bool
checkWord(const char * utf8Word,size_t len)152 MySpellChecker::checkWord(const char *utf8Word, size_t len)
153 {
154 if (len > MAXWORDLEN || !g_iconv_is_valid(m_translate_in))
155 return false;
156
157 // the 8bit encodings use precomposed forms
158 char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
159 char *in = normalizedWord;
160 char word8[MAXWORDLEN + 1];
161 char *out = word8;
162 size_t len_in = strlen(in);
163 size_t len_out = sizeof( word8 ) - 1;
164 size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out);
165 g_free(normalizedWord);
166 if ((size_t)-1 == result)
167 return false;
168 *out = '\0';
169 if (myspell->spell(word8))
170 return true;
171 else
172 return false;
173 }
174
175 char**
suggestWord(const char * const utf8Word,size_t len,size_t * nsug)176 MySpellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug)
177 {
178 if (len > MAXWORDLEN
179 || !g_iconv_is_valid(m_translate_in)
180 || !g_iconv_is_valid(m_translate_out))
181 return 0;
182
183 // the 8bit encodings use precomposed forms
184 char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
185 char *in = normalizedWord;
186 char word8[MAXWORDLEN + 1];
187 char *out = word8;
188 size_t len_in = strlen(in);
189 size_t len_out = sizeof(word8) - 1;
190 size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out);
191 g_free(normalizedWord);
192 if ((size_t)-1 == result)
193 return NULL;
194
195 *out = '\0';
196 char **sugMS;
197 *nsug = myspell->suggest(&sugMS, word8);
198 if (*nsug > 0) {
199 char **sug = g_new0 (char *, *nsug + 1);
200 for (size_t i=0; i<*nsug; i++) {
201 in = sugMS[i];
202 len_in = strlen(in);
203 len_out = MAXWORDLEN;
204 char *word = g_new0(char, len_out + 1);
205 out = reinterpret_cast<char *>(word);
206 if ((size_t)-1 == g_iconv(m_translate_out, &in, &len_in, &out, &len_out)) {
207 for (size_t j = i; j < *nsug; j++)
208 free(sugMS[j]);
209 free(sugMS);
210
211 *nsug = i;
212 return sug;
213 }
214 *(out) = 0;
215 sug[i] = word;
216 free(sugMS[i]);
217 }
218 free(sugMS);
219 return sug;
220 }
221 else
222 return 0;
223 }
224
225 static GSList *
myspell_checker_get_dictionary_dirs(EnchantBroker * broker)226 myspell_checker_get_dictionary_dirs (EnchantBroker * broker)
227 {
228 GSList *dirs = NULL;
229
230 {
231 GSList *config_dirs, *iter;
232
233 config_dirs = enchant_get_user_config_dirs ();
234
235 for (iter = config_dirs; iter; iter = iter->next)
236 {
237 dirs = g_slist_append (dirs, g_build_filename ((const gchar *)iter->data,
238 "myspell", NULL));
239 }
240
241 g_slist_foreach (config_dirs, (GFunc)g_free, NULL);
242 g_slist_free (config_dirs);
243 }
244
245 {
246 const gchar* const * system_data_dirs = g_get_system_data_dirs ();
247 const gchar* const * iter;
248
249 for (iter = system_data_dirs; *iter; iter++)
250 {
251 dirs = g_slist_append (dirs, g_build_filename (*iter, "myspell", "dicts", NULL));
252 }
253 }
254
255 /* until I work out how to link the modules against enchant in MacOSX - fjf
256 */
257 #ifndef XP_TARGET_COCOA
258 char * myspell_prefix = NULL;
259
260 /* Look for explicitly set registry values */
261 myspell_prefix = enchant_get_registry_value ("Myspell", "Data_Dir");
262 if (myspell_prefix)
263 dirs = g_slist_append (dirs, myspell_prefix);
264
265 /* Dynamically locate library and search for modules relative to it. */
266 char * enchant_prefix = enchant_get_prefix_dir();
267 if(enchant_prefix)
268 {
269 myspell_prefix = g_build_filename(enchant_prefix, "share", "enchant", "myspell", NULL);
270 g_free(enchant_prefix);
271 dirs = g_slist_append (dirs, myspell_prefix);
272 }
273 #endif
274
275 #ifdef ENCHANT_MYSPELL_DICT_DIR
276 dirs = g_slist_append (dirs, g_strdup (ENCHANT_MYSPELL_DICT_DIR));
277 #endif
278
279 #if defined(_WIN32)
280 char* open_office_dicts_dir = myspell_checker_get_open_office_dicts_dir ();
281 if (open_office_dicts_dir)
282 {
283 dirs = g_slist_append (dirs, open_office_dicts_dir);
284 }
285 #endif
286
287 {
288 GSList *config_dirs, *iter;
289
290 config_dirs = enchant_get_dirs_from_param (broker, "enchant.myspell.dictionary.path");
291
292 for (iter = config_dirs; iter; iter = iter->next)
293 {
294 dirs = g_slist_append (dirs, g_strdup ((const gchar *)iter->data));
295 }
296
297 g_slist_foreach (config_dirs, (GFunc)g_free, NULL);
298 g_slist_free (config_dirs);
299 }
300
301 return dirs;
302 }
303
304 static void
s_buildDictionaryDirs(std::vector<std::string> & dirs,EnchantBroker * broker)305 s_buildDictionaryDirs (std::vector<std::string> & dirs, EnchantBroker * broker)
306 {
307 GSList *myspell_dirs, *iter;
308
309 dirs.clear ();
310
311 myspell_dirs = myspell_checker_get_dictionary_dirs (broker);
312 for (iter = myspell_dirs; iter; iter = iter->next)
313 {
314 dirs.push_back ((const char *)iter->data);
315 }
316
317 g_slist_foreach (myspell_dirs, (GFunc)g_free, NULL);
318 g_slist_free (myspell_dirs);
319 }
320
321 static void
s_buildHashNames(std::vector<std::string> & names,EnchantBroker * broker,const char * dict)322 s_buildHashNames (std::vector<std::string> & names, EnchantBroker * broker, const char * dict)
323 {
324 names.clear ();
325
326 std::vector<std::string> dirs;
327 s_buildDictionaryDirs (dirs, broker);
328
329 char *dict_dic = g_strconcat(dict, ".dic", NULL);
330 for (size_t i = 0; i < dirs.size(); i++)
331 {
332 char *tmp = g_build_filename (dirs[i].c_str(), dict_dic, NULL);
333 names.push_back (tmp);
334 g_free (tmp);
335 }
336
337 g_free(dict_dic);
338 }
339
340 static bool
s_hasCorrespondingAffFile(const std::string & dicFile)341 s_hasCorrespondingAffFile(const std::string & dicFile)
342 {
343 std::string aff = dicFile;
344 aff.replace(aff.end()-3,aff.end(), "aff");
345 return g_file_test(aff.c_str(), G_FILE_TEST_EXISTS) != 0;
346 }
347
is_plausible_dict_for_tag(const char * dir_entry,const char * tag)348 static bool is_plausible_dict_for_tag(const char *dir_entry, const char *tag)
349 {
350 const char *dic_suffix = ".dic";
351 size_t dic_suffix_len = strlen(dic_suffix);
352 size_t dir_entry_len = strlen(dir_entry);
353 size_t tag_len = strlen(tag);
354
355 if (dir_entry_len - dic_suffix_len < tag_len)
356 return false;
357 if (strcmp(dir_entry+dir_entry_len-dic_suffix_len, dic_suffix) != 0)
358 return false;
359 if (strncmp (dir_entry, tag, tag_len) != 0)
360 return false;
361 //e.g. requested dict for "fi",
362 //reject "fil_PH.dic"
363 //allow "fi-FOO.dic", "fi_FOO.dic", "fi.dic", etc.
364 if (!ispunct(dir_entry[tag_len]))
365 return false;
366 return true;
367 }
368
369 static char *
myspell_request_dictionary(EnchantBroker * broker,const char * tag)370 myspell_request_dictionary (EnchantBroker * broker, const char * tag)
371 {
372 std::vector<std::string> names;
373
374 s_buildHashNames (names, broker, tag);
375
376 for (size_t i = 0; i < names.size (); i++) {
377 if (g_file_test(names[i].c_str(), G_FILE_TEST_EXISTS)) {
378 if(s_hasCorrespondingAffFile(names[i])){
379 return g_strdup (names[i].c_str());
380 }
381 }
382 }
383
384 std::vector<std::string> dirs;
385 s_buildDictionaryDirs (dirs, broker);
386
387 for (size_t i = 0; i < dirs.size(); i++) {
388 GDir *dir = g_dir_open (dirs[i].c_str(), 0, NULL);
389 if (dir) {
390 const char *dir_entry;
391 while ((dir_entry = g_dir_read_name (dir)) != NULL) {
392 if (is_plausible_dict_for_tag(dir_entry, tag)) {
393 char *dict = g_build_filename (dirs[i].c_str(),
394 dir_entry, NULL);
395 if(s_hasCorrespondingAffFile(dict)){
396 g_dir_close (dir);
397 return dict;
398 }
399 }
400 }
401
402 g_dir_close (dir);
403 }
404 }
405
406 return NULL;
407 }
408
409 bool
requestDictionary(const char * szLang)410 MySpellChecker::requestDictionary(const char *szLang)
411 {
412 char *dic = NULL, *aff = NULL;
413
414 dic = myspell_request_dictionary (m_broker, szLang);
415 if (!dic)
416 return false;
417
418 aff = g_strdup(dic);
419 int len_dic = strlen(dic);
420 strcpy(aff+len_dic-3, "aff");
421 if (g_file_test(aff, G_FILE_TEST_EXISTS))
422 {
423 myspell = new Hunspell(aff, dic);
424 }
425 g_free(dic);
426 g_free(aff);
427 if(myspell == NULL){
428 return false;
429 }
430 const char *enc = myspell->get_dic_encoding();
431
432 m_translate_in = g_iconv_open(enc, "UTF-8");
433 m_translate_out = g_iconv_open("UTF-8", enc);
434
435 return true;
436 }
437
438 /*
439 * Enchant
440 */
441
442 static char **
myspell_dict_suggest(EnchantDict * me,const char * const word,size_t len,size_t * out_n_suggs)443 myspell_dict_suggest (EnchantDict * me, const char *const word,
444 size_t len, size_t * out_n_suggs)
445 {
446 MySpellChecker * checker;
447
448 checker = (MySpellChecker *) me->user_data;
449 return checker->suggestWord (word, len, out_n_suggs);
450 }
451
452 static int
myspell_dict_check(EnchantDict * me,const char * const word,size_t len)453 myspell_dict_check (EnchantDict * me, const char *const word, size_t len)
454 {
455 MySpellChecker * checker;
456
457 checker = (MySpellChecker *) me->user_data;
458
459 if (checker->checkWord(word, len))
460 return 0;
461
462 return 1;
463 }
464
465 static void
myspell_provider_enum_dicts(const char * const directory,std::vector<std::string> & out_dicts)466 myspell_provider_enum_dicts (const char * const directory,
467 std::vector<std::string> & out_dicts)
468 {
469 GDir * dir = g_dir_open (directory, 0, NULL);
470 if (dir) {
471 const char * entry;
472
473 while ((entry = g_dir_read_name (dir)) != NULL) {
474 char * utf8_entry = g_filename_to_utf8 (entry, -1, NULL, NULL, NULL);
475 if (utf8_entry) {
476 std::string dir_entry (utf8_entry);
477 g_free (utf8_entry);
478
479 int hit = dir_entry.rfind (".dic");
480 if (hit != -1) {
481 /* don't include hyphenation dictionaries
482 and require .aff file to be present*/
483 if(dir_entry.compare (0, 5, "hyph_") != 0)
484 {
485 std::string name(dir_entry.substr (0, hit));
486 std::string affFileName(name + ".aff");
487 char * aff = g_build_filename(directory, affFileName.c_str(), NULL);
488 if (g_file_test(aff, G_FILE_TEST_EXISTS))
489 {
490 out_dicts.push_back (dir_entry.substr (0, hit));
491 }
492 g_free(aff);
493 }
494 }
495 }
496 }
497
498 g_dir_close (dir);
499 }
500 }
501
502 extern "C" {
503
504 ENCHANT_MODULE_EXPORT (EnchantProvider *)
505 init_enchant_provider (void);
506
507 static char **
myspell_provider_list_dicts(EnchantProvider * me,size_t * out_n_dicts)508 myspell_provider_list_dicts (EnchantProvider * me,
509 size_t * out_n_dicts)
510 {
511 std::vector<std::string> dict_dirs, dicts;
512 char ** dictionary_list = NULL;
513
514 s_buildDictionaryDirs (dict_dirs, me->owner);
515
516 for (size_t i = 0; i < dict_dirs.size(); i++)
517 {
518 myspell_provider_enum_dicts (dict_dirs[i].c_str(), dicts);
519 }
520
521 if (dicts.size () > 0) {
522 dictionary_list = g_new0 (char *, dicts.size() + 1);
523
524 for (size_t i = 0; i < dicts.size(); i++)
525 dictionary_list[i] = g_strdup (dicts[i].c_str());
526 }
527
528 *out_n_dicts = dicts.size ();
529 return dictionary_list;
530 }
531
532 static void
myspell_provider_free_string_list(EnchantProvider * me,char ** str_list)533 myspell_provider_free_string_list (EnchantProvider * me, char **str_list)
534 {
535 g_strfreev (str_list);
536 }
537
538 static EnchantDict *
myspell_provider_request_dict(EnchantProvider * me,const char * const tag)539 myspell_provider_request_dict(EnchantProvider * me, const char *const tag)
540 {
541 EnchantDict *dict;
542 MySpellChecker * checker;
543
544 checker = new MySpellChecker(me->owner);
545
546 if (!checker)
547 return NULL;
548
549 if (!checker->requestDictionary(tag)) {
550 delete checker;
551 return NULL;
552 }
553
554 dict = g_new0(EnchantDict, 1);
555 dict->user_data = (void *) checker;
556 dict->check = myspell_dict_check;
557 dict->suggest = myspell_dict_suggest;
558 // don't implement personal, session
559
560 return dict;
561 }
562
563 static void
myspell_provider_dispose_dict(EnchantProvider * me,EnchantDict * dict)564 myspell_provider_dispose_dict (EnchantProvider * me, EnchantDict * dict)
565 {
566 MySpellChecker *checker;
567
568 checker = (MySpellChecker *) dict->user_data;
569 delete checker;
570
571 g_free (dict);
572 }
573
574 static int
myspell_provider_dictionary_exists(struct str_enchant_provider * me,const char * const tag)575 myspell_provider_dictionary_exists (struct str_enchant_provider * me,
576 const char *const tag)
577 {
578 std::vector <std::string> names;
579
580 s_buildHashNames (names, me->owner, tag);
581 for (size_t i = 0; i < names.size(); i++) {
582 if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS))
583 {
584 std::string aff(names[i]);
585 aff.replace(aff.end() - 3, aff.end(), "aff");
586 if (g_file_test(aff.c_str(), G_FILE_TEST_EXISTS))
587 return 1;
588 }
589 }
590
591 return 0;
592 }
593
594 static void
myspell_provider_dispose(EnchantProvider * me)595 myspell_provider_dispose (EnchantProvider * me)
596 {
597 g_free (me);
598 }
599
600 static const char *
myspell_provider_identify(EnchantProvider * me)601 myspell_provider_identify (EnchantProvider * me)
602 {
603 return "myspell";
604 }
605
606 static const char *
myspell_provider_describe(EnchantProvider * me)607 myspell_provider_describe (EnchantProvider * me)
608 {
609 return "Myspell Provider";
610 }
611
612 EnchantProvider *
init_enchant_provider(void)613 init_enchant_provider (void)
614 {
615 EnchantProvider *provider;
616
617 provider = g_new0(EnchantProvider, 1);
618 provider->dispose = myspell_provider_dispose;
619 provider->request_dict = myspell_provider_request_dict;
620 provider->dispose_dict = myspell_provider_dispose_dict;
621 provider->dictionary_exists = myspell_provider_dictionary_exists;
622 provider->identify = myspell_provider_identify;
623 provider->describe = myspell_provider_describe;
624 provider->free_string_list = myspell_provider_free_string_list;
625 provider->list_dicts = myspell_provider_list_dicts;
626
627 return provider;
628 }
629
630 } // extern C linkage
631