1 /* enchant
2 * Copyright (C) 2020 Sander van Geloven
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 *
19 * In addition, as a special exception, Dom Lachowicz
20 * gives permission to link the code of this program with
21 * non-LGPL Spelling Provider libraries (eg: a MSFT Office
22 * spell checker backend) and distribute linked combinations including
23 * the two. You must obey the GNU General Public License in all
24 * respects for all of the code used other than said providers. If you modify
25 * this file, you may extend this exception to your version of the
26 * file, but you are not obligated to do so. If you do not wish to
27 * do so, delete this exception statement from your version.
28 */
29
30 /*
31 * This is the Nuspell Enchant Backend.
32 * Nuspell is by Dimitrij Mijoski and Sander van Geloven.
33 * See: http://nuspell.github.io/
34 */
35
36 #include "config.h"
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41
42 #include <string>
43 #include <vector>
44
45 #include "enchant-provider.h"
46 #include "unused-parameter.h"
47
48 #include <nuspell/dictionary.hxx>
49 #include <nuspell/finder.hxx>
50
51 #include <glib.h>
52
53 using namespace std;
54 using namespace nuspell;
55
56 /***************************************************************************/
57
58 class NuspellChecker
59 {
60 public:
61 bool checkWord (const char *word, size_t len);
62 char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
63
64 bool requestDictionary (const char * szLang);
65
66 private:
67 Dictionary nuspell;
68 };
69
70 /***************************************************************************/
71
72 bool
checkWord(const char * utf8Word,size_t len)73 NuspellChecker::checkWord(const char *utf8Word, size_t len)
74 {
75 // the 8-bit encodings use precomposed forms
76 char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
77 auto ret = nuspell.spell(normalizedWord);
78 g_free(normalizedWord);
79 return ret;
80 }
81
82 char**
suggestWord(const char * const utf8Word,size_t len,size_t * nsug)83 NuspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug)
84 {
85 // the 8-bit encodings use precomposed forms
86 char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
87 auto suggestions = vector<string>();
88 nuspell.suggest(normalizedWord, suggestions);
89 g_free(normalizedWord);
90 if (suggestions.empty())
91 return nullptr;
92 *nsug = suggestions.size();
93 char **sug = g_new0 (char *, *nsug + 1);
94 size_t i = 0;
95 for (auto& suggest : suggestions) {
96 char *word = g_new0(char, suggest.size() + 1);
97 strcpy(word, suggest.c_str());
98 sug[i] = word;
99 i++;
100 }
101 return sug;
102 }
103
104 static void
s_buildDictionaryDirs(vector<string> & dirs)105 s_buildDictionaryDirs (vector<string> & dirs)
106 {
107 dirs.clear ();
108
109 /* 1. personal overrides for Enchant
110 * ~/.config/enchant/nuspell
111 */
112 gchar * tmp;
113 char * config_dir = enchant_get_user_config_dir ();
114 tmp = g_build_filename (config_dir, "nuspell", nullptr);
115 dirs.push_back (tmp);
116 free (config_dir);
117 g_free(tmp);
118
119 /* Dynamically retrieved from Nuspell dictionary finder:
120 * 2. personal overrides for Hunspell
121 * $XDG_DATA_HOME/hunspell
122 * $XDG_DATA_HOME by default is $HOME/.local/share/
123 * 3. system installed for Hunspell
124 * $XDG_DATA_DIRS/hunspell
125 * $XDG_DATA_DIRS/myspell (needed for Fedora)
126 * $XDG_DATA_DIRS by default are /usr/local/share and /usr/share
127 */
128 nuspell::append_default_dir_paths(dirs);
129
130 /* 5. system installations by Enchant
131 * /usr/local/share/enchant/nuspell
132 * /usr/share/enchant/nuspell
133 */
134 char * enchant_prefix = enchant_get_prefix_dir();
135 if (enchant_prefix) {
136 tmp = g_build_filename(enchant_prefix, "share", "enchant", "nuspell", nullptr);
137 dirs.push_back (tmp);
138 g_free(enchant_prefix);
139 g_free(tmp);
140 }
141
142 /* Hunspell paths are used, therefore ENCHANT_NUSPELL_DICT_DIR is
143 * irrelevant. Hence, the following paths are not to be considered:
144 * /usr/local/share/nuspell and /usr/share/nuspell
145 */
146 }
147
148 static void
s_buildHashNames(vector<string> & names,const char * dict)149 s_buildHashNames (vector<string> & names, const char * dict)
150 {
151 names.clear ();
152
153 vector<string> dirs;
154 s_buildDictionaryDirs (dirs);
155
156 char *dict_dic = g_strconcat(dict, ".dic", nullptr);
157 for (size_t i = 0; i < dirs.size(); i++) {
158 char *tmp = g_build_filename (dirs[i].c_str(), dict_dic, nullptr);
159 names.push_back (tmp);
160 g_free (tmp);
161 }
162
163 g_free(dict_dic);
164 }
165
166 static const string
s_correspondingAffFile(const string & dicFile)167 s_correspondingAffFile(const string & dicFile)
168 {
169 string aff = dicFile;
170 if (aff.size() >= 4 && aff.compare(aff.size() - 4, 4, ".dic") == 0) {
171 aff.erase(aff.size() - 3);
172 aff += "aff";
173 }
174 return aff;
175 }
176
177 static bool
s_fileExists(const string & file)178 s_fileExists(const string & file)
179 {
180 return g_file_test(file.c_str(), G_FILE_TEST_EXISTS) != 0;
181 }
182
is_plausible_dict_for_tag(const char * dir_entry,const char * tag)183 static bool is_plausible_dict_for_tag(const char *dir_entry, const char *tag)
184 {
185 const char *dic_suffix = ".dic";
186 size_t dic_suffix_len = strlen(dic_suffix);
187 size_t dir_entry_len = strlen(dir_entry);
188 size_t tag_len = strlen(tag);
189
190 if (dir_entry_len - dic_suffix_len < tag_len)
191 return false;
192 if (strcmp(dir_entry + dir_entry_len - dic_suffix_len, dic_suffix) != 0)
193 return false;
194 if (strncmp (dir_entry, tag, tag_len) != 0)
195 return false;
196 //e.g. requested dict for "fi",
197 //reject "fil_PH.dic"
198 //allow "fi-FOO.dic", "fi_FOO.dic", "fi.dic", etc.
199 if (!ispunct(dir_entry[tag_len]))
200 return false;
201 return true;
202 }
203
204 static char *
nuspell_request_dictionary(const char * tag)205 nuspell_request_dictionary (const char * tag)
206 {
207 vector<string> names;
208
209 s_buildHashNames (names, tag);
210
211 for (size_t i = 0; i < names.size (); i++) {
212 if (g_file_test(names[i].c_str(), G_FILE_TEST_EXISTS) &&
213 s_fileExists(s_correspondingAffFile(names[i]))) {
214 return strdup (names[i].c_str());
215 }
216 }
217
218 vector<string> dirs;
219 s_buildDictionaryDirs (dirs);
220
221 for (size_t i = 0; i < dirs.size(); i++) {
222 GDir *dir = g_dir_open (dirs[i].c_str(), 0, nullptr);
223 if (dir) {
224 const char *dir_entry;
225 while ((dir_entry = g_dir_read_name (dir)) != NULL) {
226 if (is_plausible_dict_for_tag(dir_entry, tag)) {
227 char *dict = g_build_filename (dirs[i].c_str(),
228 dir_entry, nullptr);
229 if(s_fileExists(s_correspondingAffFile(dict))) {
230 g_dir_close (dir);
231 return dict;
232 }
233 g_free(dict);
234 }
235 }
236
237 g_dir_close (dir);
238 }
239 }
240
241 return NULL;
242 }
243
244 bool
requestDictionary(const char * szLang)245 NuspellChecker::requestDictionary(const char *szLang)
246 {
247 char *dic = nuspell_request_dictionary (szLang);
248 if (!dic)
249 return false;
250 string aff(s_correspondingAffFile(dic));
251 if (!s_fileExists(aff))
252 return false;
253 auto path = string(dic);
254 free(dic);
255 if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".dic") == 0)
256 path.erase(path.size() - 4);
257 else
258 return false;
259 try {
260 nuspell = nuspell::Dictionary::load_from_path(path);
261 } catch (const std::runtime_error& e) {
262 return false;
263 }
264
265 return true;
266 }
267
268 /*
269 * Enchant
270 */
271
272 static char **
nuspell_dict_suggest(EnchantDict * me,const char * const word,size_t len,size_t * out_n_suggs)273 nuspell_dict_suggest (EnchantDict * me, const char *const word,
274 size_t len, size_t * out_n_suggs)
275 {
276 NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
277 return checker->suggestWord (word, len, out_n_suggs);
278 }
279
280 static int
nuspell_dict_check(EnchantDict * me,const char * const word,size_t len)281 nuspell_dict_check (EnchantDict * me, const char *const word, size_t len)
282 {
283 NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
284
285 return !(checker->checkWord(word, len));
286 }
287
288 static int
nuspell_dict_is_word_character(EnchantDict * me _GL_UNUSED_PARAMETER,uint32_t uc,size_t n _GL_UNUSED_PARAMETER)289 nuspell_dict_is_word_character (EnchantDict * me _GL_UNUSED_PARAMETER,
290 uint32_t uc, size_t n _GL_UNUSED_PARAMETER)
291 {
292 return g_unichar_isalpha(uc);
293 }
294
295 static void
nuspell_provider_enum_dicts(const char * const directory,vector<string> & out_dicts)296 nuspell_provider_enum_dicts (const char * const directory,
297 vector<string> & out_dicts)
298 {
299 GDir * dir = g_dir_open (directory, 0, nullptr);
300 if (dir) {
301 const char * entry;
302 while ((entry = g_dir_read_name (dir)) != NULL) {
303 char * utf8_entry = g_filename_to_utf8 (entry, -1, nullptr, nullptr, nullptr);
304 if (utf8_entry) {
305 string dir_entry (utf8_entry);
306 g_free (utf8_entry);
307
308 int hit = dir_entry.rfind (".dic");
309 // don't include hyphenation dictionaries
310 if (hit != -1) {
311 // require .aff file to be present
312 if(dir_entry.compare (0, 5, "hyph_") != 0) {
313 char * dic = g_build_filename(directory, dir_entry.c_str(), nullptr);
314 if (s_fileExists(s_correspondingAffFile(dic))) {
315 out_dicts.push_back (dir_entry.substr (0, hit));
316 }
317 g_free(dic);
318 }
319 }
320 }
321 }
322
323 g_dir_close (dir);
324 }
325 }
326
327 extern "C" {
328
329 static char **
nuspell_provider_list_dicts(EnchantProvider * me _GL_UNUSED_PARAMETER,size_t * out_n_dicts)330 nuspell_provider_list_dicts (EnchantProvider * me _GL_UNUSED_PARAMETER,
331 size_t * out_n_dicts)
332 {
333 vector<string> dict_dirs, dicts;
334 char ** dictionary_list = NULL;
335
336 s_buildDictionaryDirs (dict_dirs);
337
338 for (size_t i = 0; i < dict_dirs.size(); i++) {
339 nuspell_provider_enum_dicts (dict_dirs[i].c_str(), dicts);
340 }
341
342 if (dicts.size () > 0) {
343 dictionary_list = g_new0 (char *, dicts.size() + 1);
344
345 for (size_t i = 0; i < dicts.size(); i++)
346 dictionary_list[i] = g_strdup (dicts[i].c_str());
347 }
348
349 *out_n_dicts = dicts.size ();
350 return dictionary_list;
351 }
352
353 static EnchantDict *
nuspell_provider_request_dict(EnchantProvider * me _GL_UNUSED_PARAMETER,const char * const tag)354 nuspell_provider_request_dict(EnchantProvider * me _GL_UNUSED_PARAMETER, const char *const tag)
355 {
356 NuspellChecker * checker = new NuspellChecker();
357
358 if (!checker)
359 return NULL;
360
361 if (!checker->requestDictionary(tag)) {
362 delete checker;
363 return NULL;
364 }
365
366 EnchantDict *dict = g_new0(EnchantDict, 1);
367 dict->user_data = (void *) checker;
368 dict->check = nuspell_dict_check;
369 dict->suggest = nuspell_dict_suggest;
370 // don't implement personal, session
371 dict->is_word_character = nuspell_dict_is_word_character;
372
373 return dict;
374 }
375
376 static void
nuspell_provider_dispose_dict(EnchantProvider * me _GL_UNUSED_PARAMETER,EnchantDict * dict)377 nuspell_provider_dispose_dict (EnchantProvider * me _GL_UNUSED_PARAMETER, EnchantDict * dict)
378 {
379 NuspellChecker *checker = (NuspellChecker *) dict->user_data;
380 delete checker;
381
382 g_free (dict);
383 }
384
385 static int
nuspell_provider_dictionary_exists(struct str_enchant_provider * me _GL_UNUSED_PARAMETER,const char * const tag)386 nuspell_provider_dictionary_exists (struct str_enchant_provider * me _GL_UNUSED_PARAMETER,
387 const char *const tag)
388 {
389 vector <string> names;
390 s_buildHashNames (names, tag);
391 for (size_t i = 0; i < names.size(); i++) {
392 if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS) &&
393 s_fileExists(s_correspondingAffFile(names[i]))) {
394 return 1;
395 }
396 }
397
398 return 0;
399 }
400
401 static void
nuspell_provider_dispose(EnchantProvider * me)402 nuspell_provider_dispose (EnchantProvider * me)
403 {
404 g_free (me);
405 }
406
407 static const char *
nuspell_provider_identify(EnchantProvider * me _GL_UNUSED_PARAMETER)408 nuspell_provider_identify (EnchantProvider * me _GL_UNUSED_PARAMETER)
409 {
410 return "nuspell";
411 }
412
413 static const char *
nuspell_provider_describe(EnchantProvider * me _GL_UNUSED_PARAMETER)414 nuspell_provider_describe (EnchantProvider * me _GL_UNUSED_PARAMETER)
415 {
416 return "Nuspell Provider";
417 }
418
419 EnchantProvider *init_enchant_provider (void);
420
421 EnchantProvider *
init_enchant_provider(void)422 init_enchant_provider (void)
423 {
424 EnchantProvider *provider = g_new0(EnchantProvider, 1);
425 provider->dispose = nuspell_provider_dispose;
426 provider->request_dict = nuspell_provider_request_dict;
427 provider->dispose_dict = nuspell_provider_dispose_dict;
428 provider->dictionary_exists = nuspell_provider_dictionary_exists;
429 provider->identify = nuspell_provider_identify;
430 provider->describe = nuspell_provider_describe;
431 provider->list_dicts = nuspell_provider_list_dicts;
432
433 return provider;
434 }
435
436 } // extern C linkage
437