1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* enchant
3  * Copyright (C) 2003-2004 Joan Moratinos <jmo@softcatala.org>, Dom Lachowicz
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02110-1301, USA.
19  *
20  * In addition, as a special exception, Dom Lachowicz
21  * gives permission to link the code of this program with
22  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
23  * spell checker backend) and distribute linked combinations including
24  * the two.  You must obey the GNU General Public License in all
25  * respects for all of the code used other than said providers.  If you modify
26  * this file, you may extend this exception to your version of the
27  * file, but you are not obligated to do so.  If you do not wish to
28  * do so, delete this exception statement from your version.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #include <string>
36 #include <vector>
37 
38 #include "enchant.h"
39 #include "enchant-provider.h"
40 
41 #ifdef near
42 #undef near
43 #endif
44 
45 /* built against hunspell 1.2.2 on 2008-04-12 */
46 
47 #ifdef HUNSPELL_STATIC
48 #include "hunspell.hxx"
49 #else
50 #include <hunspell/hunspell.hxx>
51 #endif
52 
53 ENCHANT_PLUGIN_DECLARE("Myspell")
54 
55 #define G_ICONV_INVALID (GIConv)-1
56 
57 #include <glib.h>
58 
59 /***************************************************************************/
60 
61 class MySpellChecker
62 {
63 public:
64 	MySpellChecker(EnchantBroker * broker);
65 	~MySpellChecker();
66 
67 	bool checkWord (const char *word, size_t len);
68 	char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
69 
70 	bool requestDictionary (const char * szLang);
71 
72 private:
73 	GIConv  m_translate_in; /* Selected translation from/to Unicode */
74 	GIConv  m_translate_out;
75 	Hunspell *myspell;
76 	EnchantBroker *m_broker;
77 };
78 
79 /***************************************************************************/
80 
81 #if defined(_WIN32)
GetRegistryValue(HKEY baseKey,const WCHAR * uKeyName,const WCHAR * uKey)82 static WCHAR* GetRegistryValue(HKEY baseKey, const WCHAR * uKeyName, const WCHAR * uKey)
83 {
84   	HKEY hKey;
85 	unsigned long lType;
86 	DWORD dwSize;
87 	WCHAR* wszValue = NULL;
88 
89 	if(RegOpenKeyExW(baseKey, uKeyName, 0, KEY_READ, &hKey) == ERROR_SUCCESS)
90 		{
91 			/* Determine size of string */
92 			if(RegQueryValueExW( hKey, uKey, NULL, &lType, NULL, &dwSize) == ERROR_SUCCESS)
93 				{
94 					wszValue = g_new0(WCHAR, dwSize + 1);
95 					RegQueryValueExW(hKey, uKey, NULL, &lType, (LPBYTE) wszValue, &dwSize);
96 				}
97 		}
98 
99 	return wszValue;
100 }
101 
102 static char *
myspell_checker_get_open_office_dicts_dir(void)103 myspell_checker_get_open_office_dicts_dir(void)
104 {
105     WCHAR* wszDirectory;
106     char* open_office_dir, * open_office_dicts_dir;
107 
108     /*start by trying current user*/
109     wszDirectory = GetRegistryValue (HKEY_CURRENT_USER, L"Software\\Microsoft\\Windows\\CurrentVersion\\App Paths\\soffice.exe", L"Path");
110     if(wszDirectory == NULL)
111     {
112         /*next try local machine*/
113         wszDirectory = GetRegistryValue (HKEY_LOCAL_MACHINE, L"Software\\Microsoft\\Windows\\CurrentVersion\\App Paths\\soffice.exe", L"Path");
114     }
115 
116     if(wszDirectory == NULL)
117     {
118         return NULL;
119     }
120 
121     else {
122        	open_office_dir = g_utf16_to_utf8 ((gunichar2*)wszDirectory, -1, NULL, NULL, NULL);
123 		open_office_dicts_dir = g_build_filename(open_office_dir, "share", "dict", "ooo", NULL);
124         g_free(wszDirectory);
125         g_free(open_office_dir);
126         return open_office_dicts_dir;
127     }
128 }
129 #endif
130 
131 static bool
g_iconv_is_valid(GIConv i)132 g_iconv_is_valid(GIConv i)
133 {
134 	return (i != G_ICONV_INVALID);
135 }
136 
MySpellChecker(EnchantBroker * broker)137 MySpellChecker::MySpellChecker(EnchantBroker * broker)
138 : m_translate_in(G_ICONV_INVALID), m_translate_out(G_ICONV_INVALID), myspell(0), m_broker(broker)
139 {
140 }
141 
~MySpellChecker()142 MySpellChecker::~MySpellChecker()
143 {
144 	delete myspell;
145 	if (g_iconv_is_valid (m_translate_in ))
146 		g_iconv_close(m_translate_in);
147 	if (g_iconv_is_valid(m_translate_out))
148 		g_iconv_close(m_translate_out);
149 }
150 
151 bool
checkWord(const char * utf8Word,size_t len)152 MySpellChecker::checkWord(const char *utf8Word, size_t len)
153 {
154 	if (len > MAXWORDLEN || !g_iconv_is_valid(m_translate_in))
155 		return false;
156 
157 	// the 8bit encodings use precomposed forms
158 	char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
159 	char *in = normalizedWord;
160 	char word8[MAXWORDLEN + 1];
161 	char *out = word8;
162 	size_t len_in = strlen(in);
163 	size_t len_out = sizeof( word8 ) - 1;
164 	size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out);
165 	g_free(normalizedWord);
166 	if ((size_t)-1 == result)
167 		return false;
168 	*out = '\0';
169 	if (myspell->spell(word8))
170 		return true;
171 	else
172 		return false;
173 }
174 
175 char**
suggestWord(const char * const utf8Word,size_t len,size_t * nsug)176 MySpellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug)
177 {
178 	if (len > MAXWORDLEN
179 		|| !g_iconv_is_valid(m_translate_in)
180 		|| !g_iconv_is_valid(m_translate_out))
181 		return 0;
182 
183 	// the 8bit encodings use precomposed forms
184 	char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
185 	char *in = normalizedWord;
186 	char word8[MAXWORDLEN + 1];
187 	char *out = word8;
188 	size_t len_in = strlen(in);
189 	size_t len_out = sizeof(word8) - 1;
190 	size_t result = g_iconv(m_translate_in, &in, &len_in, &out, &len_out);
191 	g_free(normalizedWord);
192 	if ((size_t)-1 == result)
193 		return NULL;
194 
195 	*out = '\0';
196 	char **sugMS;
197 	*nsug = myspell->suggest(&sugMS, word8);
198 	if (*nsug > 0) {
199 		char **sug = g_new0 (char *, *nsug + 1);
200 		for (size_t i=0; i<*nsug; i++) {
201 			in = sugMS[i];
202 			len_in = strlen(in);
203 			len_out = MAXWORDLEN;
204 			char *word = g_new0(char, len_out + 1);
205 			out = reinterpret_cast<char *>(word);
206 			if ((size_t)-1 == g_iconv(m_translate_out, &in, &len_in, &out, &len_out)) {
207 				for (size_t j = i; j < *nsug; j++)
208 					free(sugMS[j]);
209 				free(sugMS);
210 
211 				*nsug = i;
212 				return sug;
213 			}
214 			*(out) = 0;
215 			sug[i] = word;
216 			free(sugMS[i]);
217 		}
218 		free(sugMS);
219 		return sug;
220 	}
221 	else
222 		return 0;
223 }
224 
225 static GSList *
myspell_checker_get_dictionary_dirs(EnchantBroker * broker)226 myspell_checker_get_dictionary_dirs (EnchantBroker * broker)
227 {
228 	GSList *dirs = NULL;
229 
230 	{
231 		GSList *config_dirs, *iter;
232 
233 		config_dirs = enchant_get_user_config_dirs ();
234 
235 		for (iter = config_dirs; iter; iter = iter->next)
236 			{
237 				dirs = g_slist_append (dirs, g_build_filename ((const gchar *)iter->data,
238 									       "myspell", NULL));
239 			}
240 
241 		g_slist_foreach (config_dirs, (GFunc)g_free, NULL);
242 		g_slist_free (config_dirs);
243 	}
244 
245 	{
246 		const gchar* const * system_data_dirs = g_get_system_data_dirs ();
247 		const gchar* const * iter;
248 
249 		for (iter = system_data_dirs; *iter; iter++)
250 			{
251 				dirs = g_slist_append (dirs, g_build_filename (*iter, "myspell", "dicts", NULL));
252 			}
253 	}
254 
255 	/* until I work out how to link the modules against enchant in MacOSX - fjf
256 	 */
257 #ifndef XP_TARGET_COCOA
258 	char * myspell_prefix = NULL;
259 
260 	/* Look for explicitly set registry values */
261 	myspell_prefix = enchant_get_registry_value ("Myspell", "Data_Dir");
262 	if (myspell_prefix)
263 		dirs = g_slist_append (dirs, myspell_prefix);
264 
265 	/* Dynamically locate library and search for modules relative to it. */
266 	char * enchant_prefix = enchant_get_prefix_dir();
267 	if(enchant_prefix)
268 		{
269 			myspell_prefix = g_build_filename(enchant_prefix, "share", "enchant", "myspell", NULL);
270 			g_free(enchant_prefix);
271 			dirs = g_slist_append (dirs, myspell_prefix);
272 		}
273 #endif
274 
275 #ifdef ENCHANT_MYSPELL_DICT_DIR
276 	dirs = g_slist_append (dirs, g_strdup (ENCHANT_MYSPELL_DICT_DIR));
277 #endif
278 
279 #if defined(_WIN32)
280 	char* open_office_dicts_dir = myspell_checker_get_open_office_dicts_dir ();
281 	if (open_office_dicts_dir)
282         {
283 		dirs = g_slist_append (dirs, open_office_dicts_dir);
284 	}
285 #endif
286 
287 	{
288 		GSList *config_dirs, *iter;
289 
290 		config_dirs = enchant_get_dirs_from_param (broker, "enchant.myspell.dictionary.path");
291 
292 		for (iter = config_dirs; iter; iter = iter->next)
293 			{
294 				dirs = g_slist_append (dirs, g_strdup ((const gchar *)iter->data));
295 			}
296 
297 		g_slist_foreach (config_dirs, (GFunc)g_free, NULL);
298 		g_slist_free (config_dirs);
299 	}
300 
301 	return dirs;
302 }
303 
304 static void
s_buildDictionaryDirs(std::vector<std::string> & dirs,EnchantBroker * broker)305 s_buildDictionaryDirs (std::vector<std::string> & dirs, EnchantBroker * broker)
306 {
307 	GSList *myspell_dirs, *iter;
308 
309 	dirs.clear ();
310 
311 	myspell_dirs = myspell_checker_get_dictionary_dirs (broker);
312 	for (iter = myspell_dirs; iter; iter = iter->next)
313 		{
314 			dirs.push_back ((const char *)iter->data);
315 		}
316 
317 	g_slist_foreach (myspell_dirs, (GFunc)g_free, NULL);
318 	g_slist_free (myspell_dirs);
319 }
320 
321 static void
s_buildHashNames(std::vector<std::string> & names,EnchantBroker * broker,const char * dict)322 s_buildHashNames (std::vector<std::string> & names, EnchantBroker * broker, const char * dict)
323 {
324 	names.clear ();
325 
326 	std::vector<std::string> dirs;
327 	s_buildDictionaryDirs (dirs, broker);
328 
329 	char *dict_dic = g_strconcat(dict, ".dic", NULL);
330 	for (size_t i = 0; i < dirs.size(); i++)
331 		{
332 			char *tmp = g_build_filename (dirs[i].c_str(), dict_dic, NULL);
333 			names.push_back (tmp);
334 			g_free (tmp);
335 		}
336 
337 	g_free(dict_dic);
338 }
339 
340 static bool
s_hasCorrespondingAffFile(const std::string & dicFile)341 s_hasCorrespondingAffFile(const std::string & dicFile)
342 {
343     std::string aff = dicFile;
344     aff.replace(aff.end()-3,aff.end(), "aff");
345     return g_file_test(aff.c_str(), G_FILE_TEST_EXISTS) != 0;
346 }
347 
is_plausible_dict_for_tag(const char * dir_entry,const char * tag)348 static bool is_plausible_dict_for_tag(const char *dir_entry, const char *tag)
349 {
350     const char *dic_suffix = ".dic";
351     size_t dic_suffix_len = strlen(dic_suffix);
352     size_t dir_entry_len = strlen(dir_entry);
353     size_t tag_len = strlen(tag);
354 
355     if (dir_entry_len - dic_suffix_len < tag_len)
356         return false;
357     if (strcmp(dir_entry+dir_entry_len-dic_suffix_len, dic_suffix) != 0)
358         return false;
359     if (strncmp (dir_entry, tag, tag_len) != 0)
360         return false;
361     //e.g. requested dict for "fi",
362     //reject "fil_PH.dic"
363     //allow "fi-FOO.dic", "fi_FOO.dic", "fi.dic", etc.
364     if (!ispunct(dir_entry[tag_len]))
365         return false;
366     return true;
367 }
368 
369 static char *
myspell_request_dictionary(EnchantBroker * broker,const char * tag)370 myspell_request_dictionary (EnchantBroker * broker, const char * tag)
371 {
372 	std::vector<std::string> names;
373 
374 	s_buildHashNames (names, broker, tag);
375 
376 	for (size_t i = 0; i < names.size (); i++) {
377 		if (g_file_test(names[i].c_str(), G_FILE_TEST_EXISTS)) {
378 			if(s_hasCorrespondingAffFile(names[i])){
379 				return g_strdup (names[i].c_str());
380 			}
381 		}
382 	}
383 
384 	std::vector<std::string> dirs;
385 	s_buildDictionaryDirs (dirs, broker);
386 
387 	for (size_t i = 0; i < dirs.size(); i++) {
388 		GDir *dir = g_dir_open (dirs[i].c_str(), 0, NULL);
389 		if (dir) {
390 			const char *dir_entry;
391 			while ((dir_entry = g_dir_read_name (dir)) != NULL) {
392 				if (is_plausible_dict_for_tag(dir_entry, tag)) {
393 					char *dict = g_build_filename (dirs[i].c_str(),
394 								       dir_entry, NULL);
395                     if(s_hasCorrespondingAffFile(dict)){
396 			                    g_dir_close (dir);
397 					    return dict;
398                     }
399 				}
400 			}
401 
402 			g_dir_close (dir);
403 		}
404 	}
405 
406 	return NULL;
407 }
408 
409 bool
requestDictionary(const char * szLang)410 MySpellChecker::requestDictionary(const char *szLang)
411 {
412 	char *dic = NULL, *aff = NULL;
413 
414 	dic = myspell_request_dictionary (m_broker, szLang);
415 	if (!dic)
416 		return false;
417 
418 	aff = g_strdup(dic);
419 	int len_dic = strlen(dic);
420 	strcpy(aff+len_dic-3, "aff");
421 	if (g_file_test(aff, G_FILE_TEST_EXISTS))
422 	{
423 		myspell = new Hunspell(aff, dic);
424 	}
425 	g_free(dic);
426 	g_free(aff);
427 	if(myspell == NULL){
428 		return false;
429 	}
430 	const char *enc = myspell->get_dic_encoding();
431 
432 	m_translate_in = g_iconv_open(enc, "UTF-8");
433 	m_translate_out = g_iconv_open("UTF-8", enc);
434 
435 	return true;
436 }
437 
438 /*
439  * Enchant
440  */
441 
442 static char **
myspell_dict_suggest(EnchantDict * me,const char * const word,size_t len,size_t * out_n_suggs)443 myspell_dict_suggest (EnchantDict * me, const char *const word,
444 		     size_t len, size_t * out_n_suggs)
445 {
446 	MySpellChecker * checker;
447 
448 	checker = (MySpellChecker *) me->user_data;
449 	return checker->suggestWord (word, len, out_n_suggs);
450 }
451 
452 static int
myspell_dict_check(EnchantDict * me,const char * const word,size_t len)453 myspell_dict_check (EnchantDict * me, const char *const word, size_t len)
454 {
455 	MySpellChecker * checker;
456 
457 	checker = (MySpellChecker *) me->user_data;
458 
459 	if (checker->checkWord(word, len))
460 		return 0;
461 
462 	return 1;
463 }
464 
465 static void
myspell_provider_enum_dicts(const char * const directory,std::vector<std::string> & out_dicts)466 myspell_provider_enum_dicts (const char * const directory,
467 			     std::vector<std::string> & out_dicts)
468 {
469 	GDir * dir = g_dir_open (directory, 0, NULL);
470 	if (dir) {
471 		const char * entry;
472 
473 		while ((entry = g_dir_read_name (dir)) != NULL) {
474 			char * utf8_entry = g_filename_to_utf8 (entry, -1, NULL, NULL, NULL);
475 			if (utf8_entry) {
476 				std::string dir_entry (utf8_entry);
477 				g_free (utf8_entry);
478 
479 				int hit = dir_entry.rfind (".dic");
480 				if (hit != -1) {
481 					/* don't include hyphenation dictionaries
482 					   and require .aff file to be present*/
483 					if(dir_entry.compare (0, 5, "hyph_") != 0)
484 					{
485 						std::string name(dir_entry.substr (0, hit));
486 						std::string affFileName(name + ".aff");
487 						char * aff = g_build_filename(directory, affFileName.c_str(), NULL);
488 						if (g_file_test(aff, G_FILE_TEST_EXISTS))
489 						{
490 							out_dicts.push_back (dir_entry.substr (0, hit));
491 						}
492 						g_free(aff);
493 					}
494 				}
495 			}
496 		}
497 
498 		g_dir_close (dir);
499 	}
500 }
501 
502 extern "C" {
503 
504 ENCHANT_MODULE_EXPORT (EnchantProvider *)
505 	     init_enchant_provider (void);
506 
507 static char **
myspell_provider_list_dicts(EnchantProvider * me,size_t * out_n_dicts)508 myspell_provider_list_dicts (EnchantProvider * me,
509 			    size_t * out_n_dicts)
510 {
511 	std::vector<std::string> dict_dirs, dicts;
512 	char ** dictionary_list = NULL;
513 
514 	s_buildDictionaryDirs (dict_dirs, me->owner);
515 
516 	for (size_t i = 0; i < dict_dirs.size(); i++)
517 		{
518 			myspell_provider_enum_dicts (dict_dirs[i].c_str(), dicts);
519 		}
520 
521 	if (dicts.size () > 0) {
522 		dictionary_list = g_new0 (char *, dicts.size() + 1);
523 
524 		for (size_t i = 0; i < dicts.size(); i++)
525 			dictionary_list[i] = g_strdup (dicts[i].c_str());
526 	}
527 
528 	*out_n_dicts = dicts.size ();
529 	return dictionary_list;
530 }
531 
532 static void
myspell_provider_free_string_list(EnchantProvider * me,char ** str_list)533 myspell_provider_free_string_list (EnchantProvider * me, char **str_list)
534 {
535 	g_strfreev (str_list);
536 }
537 
538 static EnchantDict *
myspell_provider_request_dict(EnchantProvider * me,const char * const tag)539 myspell_provider_request_dict(EnchantProvider * me, const char *const tag)
540 {
541 	EnchantDict *dict;
542 	MySpellChecker * checker;
543 
544 	checker = new MySpellChecker(me->owner);
545 
546 	if (!checker)
547 		return NULL;
548 
549 	if (!checker->requestDictionary(tag)) {
550 		delete checker;
551 		return NULL;
552 	}
553 
554 	dict = g_new0(EnchantDict, 1);
555 	dict->user_data = (void *) checker;
556 	dict->check = myspell_dict_check;
557 	dict->suggest = myspell_dict_suggest;
558 	// don't implement personal, session
559 
560 	return dict;
561 }
562 
563 static void
myspell_provider_dispose_dict(EnchantProvider * me,EnchantDict * dict)564 myspell_provider_dispose_dict (EnchantProvider * me, EnchantDict * dict)
565 {
566 	MySpellChecker *checker;
567 
568 	checker = (MySpellChecker *) dict->user_data;
569 	delete checker;
570 
571 	g_free (dict);
572 }
573 
574 static int
myspell_provider_dictionary_exists(struct str_enchant_provider * me,const char * const tag)575 myspell_provider_dictionary_exists (struct str_enchant_provider * me,
576 				    const char *const tag)
577 {
578 	std::vector <std::string> names;
579 
580 	s_buildHashNames (names, me->owner, tag);
581 	for (size_t i = 0; i < names.size(); i++) {
582 		if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS))
583 		{
584 			std::string aff(names[i]);
585 			aff.replace(aff.end() - 3, aff.end(), "aff");
586 			if (g_file_test(aff.c_str(), G_FILE_TEST_EXISTS))
587 				return 1;
588 		}
589 	}
590 
591 	return 0;
592 }
593 
594 static void
myspell_provider_dispose(EnchantProvider * me)595 myspell_provider_dispose (EnchantProvider * me)
596 {
597 	g_free (me);
598 }
599 
600 static const char *
myspell_provider_identify(EnchantProvider * me)601 myspell_provider_identify (EnchantProvider * me)
602 {
603 	return "myspell";
604 }
605 
606 static const char *
myspell_provider_describe(EnchantProvider * me)607 myspell_provider_describe (EnchantProvider * me)
608 {
609 	return "Myspell Provider";
610 }
611 
612 EnchantProvider *
init_enchant_provider(void)613 init_enchant_provider (void)
614 {
615 	EnchantProvider *provider;
616 
617 	provider = g_new0(EnchantProvider, 1);
618 	provider->dispose = myspell_provider_dispose;
619 	provider->request_dict = myspell_provider_request_dict;
620 	provider->dispose_dict = myspell_provider_dispose_dict;
621 	provider->dictionary_exists = myspell_provider_dictionary_exists;
622 	provider->identify = myspell_provider_identify;
623 	provider->describe = myspell_provider_describe;
624 	provider->free_string_list = myspell_provider_free_string_list;
625 	provider->list_dicts = myspell_provider_list_dicts;
626 
627 	return provider;
628 }
629 
630 } // extern C linkage
631