1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include <string>
7 #include <vector>
8
9 #include "enchant-provider.h"
10 #include "sp_spell.h"
11 #include "ispell_checker.h"
12 #include "enchant.h"
13
14 #ifndef ENCHANT_ISPELL_HOME_DIR
15 #define ENCHANT_ISPELL_HOME_DIR "ispell"
16 #endif
17
18 ENCHANT_PLUGIN_DECLARE("Ispell")
19
20 #define G_ICONV_INVALID (GIConv)-1
21
22 /***************************************************************************/
23
24 typedef struct str_ispell_map
25 {
26 const char * lang;
27 const char * dict;
28 const char * enc;
29 } IspellMap;
30
31 static const IspellMap ispell_map [] = {
32 {"ca" ,"catala.hash" ,"iso-8859-1" },
33 {"cs" ,"czech.hash" ,"iso-8859-2" },
34 {"da" ,"dansk.hash" ,"iso-8859-1" },
35 {"de" ,"deutsch.hash" ,"iso-8859-1" },
36 {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
37 {"el" ,"ellhnika.hash" ,"iso-8859-7" },
38 {"en" ,"british.hash" ,"iso-8859-1" },
39 {"en_PH" ,"american.hash" ,"iso-8859-1" },
40 {"en_US" ,"american.hash" ,"iso-8859-1" },
41 {"eo" ,"esperanto.hash" ,"iso-8859-3" },
42 {"es" ,"espanol.hash" ,"iso-8859-1" },
43 {"fi" ,"finnish.hash" ,"iso-8859-1" },
44 {"fr" ,"francais.hash" ,"iso-8859-1" },
45 {"hu" ,"hungarian.hash" ,"iso-8859-2" },
46 {"ga" ,"irish.hash" ,"iso-8859-1" },
47 {"gl" ,"galician.hash" ,"iso-8859-1" },
48 {"ia" ,"interlingua.hash" ,"iso-8859-1" },
49 {"it" ,"italian.hash" ,"iso-8859-1" },
50 {"la" ,"mlatin.hash" ,"iso-8859-1" },
51 {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
52 {"nl" ,"nederlands.hash" ,"iso-8859-1" },
53 {"nb" ,"norsk.hash" ,"iso-8859-1" },
54 {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
55 {"no" ,"norsk.hash" ,"iso-8859-1" },
56 {"pl" ,"polish.hash" ,"iso-8859-2" },
57 {"pt" ,"brazilian.hash" ,"iso-8859-1" },
58 {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
59 {"ru" ,"russian.hash" ,"koi8-r" },
60 {"sc" ,"sardinian.hash" ,"iso-8859-1" },
61 {"sk" ,"slovak.hash" ,"iso-8859-2" },
62 {"sl" ,"slovensko.hash" ,"iso-8859-2" },
63 {"sv" ,"svenska.hash" ,"iso-8859-1" },
64 {"uk" ,"ukrainian.hash" ,"koi8-u" },
65 {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
66 };
67
68 static const size_t size_ispell_map = G_N_ELEMENTS(ispell_map);
69
70 static bool
g_iconv_is_valid(GIConv i)71 g_iconv_is_valid(GIConv i)
72 {
73 return (i != G_ICONV_INVALID);
74 }
75
76 void
try_autodetect_charset(const char * const inEncoding)77 ISpellChecker::try_autodetect_charset(const char * const inEncoding)
78 {
79 if (inEncoding && strlen(inEncoding))
80 {
81 m_translate_in = g_iconv_open(inEncoding, "UTF-8");
82 m_translate_out = g_iconv_open("UTF-8", inEncoding);
83 }
84 }
85
86 /***************************************************************************/
87 /***************************************************************************/
88
ISpellChecker(EnchantBroker * broker)89 ISpellChecker::ISpellChecker(EnchantBroker * broker)
90 : m_broker(broker),
91 deftflag(-1),
92 prefstringchar(-1),
93 m_bSuccessfulInit(false),
94 m_BC(NULL),
95 m_cd(NULL),
96 m_cl(NULL),
97 m_cm(NULL),
98 m_ho(NULL),
99 m_nd(NULL),
100 m_so(NULL),
101 m_se(NULL),
102 m_ti(NULL),
103 m_te(NULL),
104 m_hashstrings(NULL),
105 m_hashtbl(NULL),
106 m_pflaglist(NULL),
107 m_sflaglist(NULL),
108 m_chartypes(NULL),
109 m_infile(NULL),
110 m_outfile(NULL),
111 m_askfilename(NULL),
112 m_Trynum(0),
113 m_translate_in(G_ICONV_INVALID),
114 m_translate_out(G_ICONV_INVALID)
115 {
116 memset(m_sflagindex,0,sizeof(m_sflagindex));
117 memset(m_pflagindex,0,sizeof(m_pflagindex));
118 }
119
120 #ifndef FREEP
121 #define FREEP(p) do { if (p) free(p); } while (0)
122 #endif
123
~ISpellChecker()124 ISpellChecker::~ISpellChecker()
125 {
126 if (m_bSuccessfulInit) {
127 // only cleanup our mess if we were successfully initialized
128
129 clearindex (m_pflagindex);
130 clearindex (m_sflagindex);
131 }
132
133 FREEP(m_hashtbl);
134 FREEP(m_hashstrings);
135 FREEP(m_sflaglist);
136 FREEP(m_chartypes);
137
138 if (g_iconv_is_valid (m_translate_in ))
139 g_iconv_close(m_translate_in);
140 m_translate_in = G_ICONV_INVALID;
141 if (g_iconv_is_valid(m_translate_out))
142 g_iconv_close(m_translate_out);
143 m_translate_out = G_ICONV_INVALID;
144 }
145
146 bool
checkWord(const char * const utf8Word,size_t length)147 ISpellChecker::checkWord(const char * const utf8Word, size_t length)
148 {
149 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
150 char szWord[INPUTWORDLEN + MAXAFFIXLEN];
151
152 if (!m_bSuccessfulInit)
153 return false;
154
155 if (!utf8Word || length >= (INPUTWORDLEN + MAXAFFIXLEN) || length == 0)
156 return false;
157
158 bool retVal = false;
159
160 if (!g_iconv_is_valid(m_translate_in))
161 return false;
162 else
163 {
164 /* convert to 8bit string and null terminate */
165 size_t len_in, len_out, result;
166 // the 8bit encodings use precomposed forms
167 char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC);
168 char *In = normalizedWord;
169 char *Out = szWord;
170
171 len_in = strlen(In);
172 len_out = sizeof( szWord ) - 1;
173 result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out);
174 g_free(normalizedWord);
175 if ((size_t)-1 == result)
176 return false;
177 *Out = '\0';
178 }
179
180 if (!strtoichar(iWord, szWord, sizeof(iWord), 0))
181 {
182 if (good(iWord, 0, 0, 1, 0) == 1 ||
183 compoundgood(iWord, 1) == 1)
184 {
185 retVal = true;
186 }
187 }
188
189 return retVal;
190 }
191
192 char **
suggestWord(const char * const utf8Word,size_t length,size_t * out_n_suggestions)193 ISpellChecker::suggestWord(const char * const utf8Word, size_t length,
194 size_t * out_n_suggestions)
195 {
196 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
197 char word8[INPUTWORDLEN + MAXAFFIXLEN];
198 int c;
199
200 *out_n_suggestions = 0;
201
202 if (!m_bSuccessfulInit)
203 return NULL;
204 if (!utf8Word || length >= (INPUTWORDLEN + MAXAFFIXLEN) || length == 0)
205 return NULL;
206
207 if (!g_iconv_is_valid(m_translate_in))
208 return NULL;
209 else
210 {
211 /* convert to 8bit string and null terminate */
212
213 size_t len_in, len_out, result;
214 // the 8bit encodings use precomposed forms
215 char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC);
216 char *In = normalizedWord;
217 char *Out = word8;
218 len_in = strlen(In);
219 len_out = sizeof( word8 ) - 1;
220 result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out);
221 g_free(normalizedWord);
222 if ((size_t)-1 == result)
223 return NULL;
224 *Out = '\0';
225 }
226
227 if (!strtoichar(iWord, word8, sizeof(iWord), 0))
228 makepossibilities(iWord);
229 else
230 return NULL;
231
232 char **sugg_arr = NULL;
233 *out_n_suggestions = m_pcount;
234
235 {
236 sugg_arr = g_new0 (char *, *out_n_suggestions + 1);
237
238 for (c = 0; c < m_pcount; c++)
239 {
240 int l = strlen(m_possibilities[c]);
241
242 char *utf8Sugg = g_new0(char, INPUTWORDLEN + MAXAFFIXLEN + 1);
243
244 if (!g_iconv_is_valid(m_translate_out))
245 {
246 /* copy to 8bit string and null terminate */
247 for (int x = 0; x < l; x++)
248 utf8Sugg[x] = static_cast<unsigned char>(m_possibilities[c][x]);
249 utf8Sugg[l] = 0;
250 }
251 else
252 {
253 /* convert to 32bit string and null terminate */
254
255 size_t len_in, len_out;
256 char *In = m_possibilities[c];
257 char *Out = reinterpret_cast<char *>(utf8Sugg);
258
259 len_in = l;
260 len_out = INPUTWORDLEN + MAXAFFIXLEN;
261 if ((size_t)-1 == g_iconv(m_translate_out, &In, &len_in, &Out, &len_out)) {
262 *out_n_suggestions = c;
263 return sugg_arr;
264 }
265 *(Out) = 0;
266 }
267
268 sugg_arr[c] = utf8Sugg;
269 }
270 }
271
272 return sugg_arr;
273 }
274
275 static GSList *
ispell_checker_get_dictionary_dirs(EnchantBroker * broker)276 ispell_checker_get_dictionary_dirs (EnchantBroker * broker)
277 {
278 GSList *dirs = NULL;
279
280 {
281 GSList *config_dirs, *iter;
282
283 config_dirs = enchant_get_user_config_dirs ();
284
285 for (iter = config_dirs; iter; iter = iter->next)
286 {
287 dirs = g_slist_append (dirs, g_build_filename ((const gchar *)iter->data,
288 ENCHANT_ISPELL_HOME_DIR, NULL));
289 }
290
291 g_slist_foreach (config_dirs, (GFunc)g_free, NULL);
292 g_slist_free (config_dirs);
293 }
294
295 #if 0
296 {
297 const gchar* const * system_data_dirs = g_get_system_data_dirs ();
298 const gchar* const * iter;
299
300 for (iter = system_data_dirs; *iter; iter++)
301 {
302 dirs = g_slist_append (dirs, g_build_filename (*iter, "ispell", "dicts", NULL));
303 }
304 }
305 #endif
306
307 /* until I work out how to link the modules against enchant in MacOSX - fjf
308 */
309 #ifndef XP_TARGET_COCOA
310 char * ispell_prefix = NULL;
311
312 /* Look for explicitly set registry values */
313 ispell_prefix = enchant_get_registry_value ("Ispell", "Data_Dir");
314 if (ispell_prefix)
315 dirs = g_slist_append (dirs, ispell_prefix);
316
317 /* Dynamically locate library and search for modules relative to it. */
318 char * enchant_prefix = enchant_get_prefix_dir();
319 if(enchant_prefix)
320 {
321 ispell_prefix = g_build_filename(enchant_prefix, "share", "enchant", "ispell", NULL);
322 g_free(enchant_prefix);
323 dirs = g_slist_append (dirs, ispell_prefix);
324 }
325 #endif
326
327 #ifdef ENCHANT_ISPELL_DICT_DIR
328 dirs = g_slist_append (dirs, g_strdup (ENCHANT_ISPELL_DICT_DIR));
329 #endif
330
331 {
332 GSList *config_dirs, *iter;
333
334 config_dirs = enchant_get_dirs_from_param (broker, "enchant.ispell.dictionary.path");
335
336 for (iter = config_dirs; iter; iter = iter->next)
337 {
338 dirs = g_slist_append (dirs, g_strdup ((const gchar *)iter->data));
339 }
340
341 g_slist_foreach (config_dirs, (GFunc)g_free, NULL);
342 g_slist_free (config_dirs);
343 }
344
345 return dirs;
346 }
347
348 static void
s_buildHashNames(std::vector<std::string> & names,EnchantBroker * broker,const char * dict)349 s_buildHashNames (std::vector<std::string> & names, EnchantBroker * broker, const char * dict)
350 {
351 names.clear ();
352
353 GSList *dirs, *iter;
354
355 dirs = ispell_checker_get_dictionary_dirs(broker);
356 for (iter = dirs; iter; iter = iter->next)
357 {
358 char *tmp;
359
360 tmp = g_build_filename ((const gchar *)iter->data, dict, NULL);
361 names.push_back (tmp);
362 g_free (tmp);
363 }
364
365 g_slist_foreach (dirs, (GFunc)g_free, NULL);
366 g_slist_free (dirs);
367 }
368
369 char *
loadDictionary(const char * szdict)370 ISpellChecker::loadDictionary (const char * szdict)
371 {
372 std::vector<std::string> dict_names;
373
374 s_buildHashNames (dict_names, m_broker, szdict);
375
376 for (size_t i = 0; i < dict_names.size(); i++)
377 {
378 if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
379 return g_strdup (dict_names[i].c_str());
380 }
381
382 return NULL;
383 }
384
385 /*!
386 * Load ispell dictionary hash file for given language.
387 *
388 * \param szLang - The language tag ("en-US") we want to use
389 * \return The name of the dictionary file
390 */
391 bool
loadDictionaryForLanguage(const char * szLang)392 ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
393 {
394 char *hashname = NULL;
395
396 const char * encoding = NULL;
397 const char * szFile = NULL;
398
399 for (size_t i = 0; i < size_ispell_map; i++)
400 {
401 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
402 if (!strcmp (szLang, mapping->lang))
403 {
404 szFile = mapping->dict;
405 encoding = mapping->enc;
406 break;
407 }
408 }
409
410 if (!szFile || !strlen(szFile))
411 return false;
412
413 alloc_ispell_struct();
414
415 if (!(hashname = loadDictionary(szFile)))
416 return false;
417
418 // one of the two above calls succeeded
419 setDictionaryEncoding (hashname, encoding);
420 g_free (hashname);
421
422 return true;
423 }
424
425 void
setDictionaryEncoding(const char * hashname,const char * encoding)426 ISpellChecker::setDictionaryEncoding( const char * hashname, const char * encoding )
427 {
428 /* Get Hash encoding from XML file. This should always work! */
429 try_autodetect_charset(encoding);
430
431 if (g_iconv_is_valid(m_translate_in) && g_iconv_is_valid(m_translate_out))
432 {
433 /* We still have to setup prefstringchar*/
434 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
435 : static_cast<int *>(NULL));
436
437 if (prefstringchar < 0)
438 {
439 char teststring[64];
440 for(int n1 = 1; n1 <= 15; n1++)
441 {
442 sprintf(teststring, "latin%d", n1);
443 prefstringchar = findfiletype(teststring, 1,
444 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
445 if (prefstringchar >= 0)
446 break;
447 }
448 }
449
450 return; /* success */
451 }
452
453 /* Test for UTF-8 first */
454 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
455 if (prefstringchar >= 0)
456 {
457 m_translate_in = g_iconv_open("UTF-8", "UTF-8");
458 m_translate_out = g_iconv_open("UTF-8", "UTF-8");
459 }
460
461 if (g_iconv_is_valid(m_translate_in) && g_iconv_is_valid(m_translate_out))
462 return; /* success */
463
464 /* Test for "latinN" */
465 if (!g_iconv_is_valid(m_translate_in))
466 {
467 /* Look for "altstringtype" names from latin1 to latin15 */
468 for(int n1 = 1; n1 <= 15; n1++)
469 {
470 char * teststring = g_strdup_printf("latin%u", n1);
471 prefstringchar = findfiletype(teststring, 1,
472 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
473 if (prefstringchar >= 0)
474 {
475 m_translate_in = g_iconv_open(teststring, "UTF-8");
476 m_translate_out = g_iconv_open("UTF-8", teststring);
477 g_free (teststring);
478 break;
479 }
480 else
481 {
482 g_free (teststring);
483 }
484 }
485 }
486
487 /* If nothing found, use latin1 */
488 if (!g_iconv_is_valid(m_translate_in))
489 {
490 m_translate_in = g_iconv_open("latin1", "UTF-8");
491 m_translate_out = g_iconv_open("UTF-8", "latin1");
492 }
493 }
494
495 bool
requestDictionary(const char * szLang)496 ISpellChecker::requestDictionary(const char *szLang)
497 {
498 if (!loadDictionaryForLanguage (szLang))
499 {
500 // handle a shortened version of the language tag: en_US => en
501 std::string shortened_dict (szLang);
502 size_t uscore_pos;
503
504 if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
505 shortened_dict = shortened_dict.substr(0, uscore_pos);
506 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
507 return false;
508 } else
509 return false;
510 }
511
512 m_bSuccessfulInit = true;
513
514 if (prefstringchar < 0)
515 m_defdupchar = 0;
516 else
517 m_defdupchar = prefstringchar;
518
519 return true;
520 }
521
522 static char **
ispell_dict_suggest(EnchantDict * me,const char * const word,size_t len,size_t * out_n_suggs)523 ispell_dict_suggest (EnchantDict * me, const char *const word,
524 size_t len, size_t * out_n_suggs)
525 {
526 ISpellChecker * checker;
527
528 checker = (ISpellChecker *) me->user_data;
529 return checker->suggestWord (word, len, out_n_suggs);
530 }
531
532 static int
ispell_dict_check(EnchantDict * me,const char * const word,size_t len)533 ispell_dict_check (EnchantDict * me, const char *const word, size_t len)
534 {
535 ISpellChecker * checker;
536
537 checker = (ISpellChecker *) me->user_data;
538
539 if (checker->checkWord(word, len))
540 return 0;
541
542 return 1;
543 }
544
545 static EnchantDict *
ispell_provider_request_dict(EnchantProvider * me,const char * const tag)546 ispell_provider_request_dict (EnchantProvider * me, const char *const tag)
547 {
548 EnchantDict *dict;
549 ISpellChecker * checker;
550
551 checker = new ISpellChecker (me->owner);
552
553 if (!checker)
554 {
555 return NULL;
556 }
557
558 if (!checker->requestDictionary(tag)) {
559 delete checker;
560 return NULL;
561 }
562
563 dict = g_new0 (EnchantDict, 1);
564 dict->user_data = (void *) checker;
565 dict->check = ispell_dict_check;
566 dict->suggest = ispell_dict_suggest;
567 // don't implement session or personal
568
569 return dict;
570 }
571
572 static void
ispell_provider_dispose_dict(EnchantProvider * me,EnchantDict * dict)573 ispell_provider_dispose_dict (EnchantProvider * me, EnchantDict * dict)
574 {
575 ISpellChecker * checker;
576
577 checker = (ISpellChecker *) dict->user_data;
578 delete checker;
579
580 g_free (dict);
581 }
582
583 static int
_ispell_provider_dictionary_exists(EnchantBroker * broker,const char * const szFile)584 _ispell_provider_dictionary_exists (EnchantBroker * broker, const char *const szFile)
585 {
586 std::vector <std::string> names;
587
588 s_buildHashNames (names, broker, szFile);
589 for (size_t i = 0; i < names.size(); i++) {
590 if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS))
591 return 1;
592 }
593
594 return 0;
595 }
596
597 extern "C" {
598
599 ENCHANT_MODULE_EXPORT (EnchantProvider *)
600 init_enchant_provider (void);
601
602 static char **
ispell_provider_list_dictionaries(EnchantProvider * me,size_t * out_n_dicts)603 ispell_provider_list_dictionaries (EnchantProvider * me,
604 size_t * out_n_dicts)
605 {
606 size_t i, nb;
607 char ** out_dicts = g_new0 (char *, size_ispell_map + 1);
608
609 (void)me;
610
611 nb = 0;
612 for (i = 0; i < size_ispell_map; i++)
613 if (_ispell_provider_dictionary_exists (me->owner, ispell_map[i].dict))
614 out_dicts[nb++] = g_strdup (ispell_map[i].lang);
615
616 *out_n_dicts = nb;
617 if (nb == 0) {
618 g_free (out_dicts);
619 out_dicts = NULL;
620 }
621
622 return out_dicts;
623 }
624
625 static int
ispell_provider_dictionary_exists(struct str_enchant_provider * me,const char * const tag)626 ispell_provider_dictionary_exists (struct str_enchant_provider * me,
627 const char *const tag)
628 {
629 std::string shortened_dict (tag);
630 size_t uscore_pos;
631 if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1))
632 shortened_dict = shortened_dict.substr(0, uscore_pos);
633
634 for (size_t i = 0; i < size_ispell_map; i++)
635 {
636 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
637 if (!strcmp (tag, mapping->lang) || !strcmp (shortened_dict.c_str(), mapping->lang))
638 return _ispell_provider_dictionary_exists(me->owner, mapping->dict);
639 }
640
641 return 0;
642 }
643
644 static void
ispell_provider_free_string_list(EnchantProvider * me,char ** str_list)645 ispell_provider_free_string_list (EnchantProvider * me, char **str_list)
646 {
647 g_strfreev (str_list);
648 }
649
650 static void
ispell_provider_dispose(EnchantProvider * me)651 ispell_provider_dispose (EnchantProvider * me)
652 {
653 g_free (me);
654 }
655
656 static const char *
ispell_provider_identify(EnchantProvider * me)657 ispell_provider_identify (EnchantProvider * me)
658 {
659 return "ispell";
660 }
661
662 static const char *
ispell_provider_describe(EnchantProvider * me)663 ispell_provider_describe (EnchantProvider * me)
664 {
665 return "Ispell Provider";
666 }
667
668 EnchantProvider *
init_enchant_provider(void)669 init_enchant_provider (void)
670 {
671 EnchantProvider *provider;
672
673 provider = g_new0 (EnchantProvider, 1);
674 provider->dispose = ispell_provider_dispose;
675 provider->request_dict = ispell_provider_request_dict;
676 provider->dispose_dict = ispell_provider_dispose_dict;
677 provider->dictionary_exists = ispell_provider_dictionary_exists;
678 provider->identify = ispell_provider_identify;
679 provider->describe = ispell_provider_describe;
680 provider->list_dicts = ispell_provider_list_dictionaries;
681 provider->free_string_list = ispell_provider_free_string_list;
682
683 return provider;
684 }
685
686 } // extern C linkage
687