1 /* enchant
2  * Copyright (C) 2020 Sander van Geloven
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  *
19  * In addition, as a special exception, Dom Lachowicz
20  * gives permission to link the code of this program with
21  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
22  * spell checker backend) and distribute linked combinations including
23  * the two.  You must obey the GNU General Public License in all
24  * respects for all of the code used other than said providers.  If you modify
25  * this file, you may extend this exception to your version of the
26  * file, but you are not obligated to do so.  If you do not wish to
27  * do so, delete this exception statement from your version.
28  */
29 
30 /*
31  * This is the Nuspell Enchant Backend.
32  * Nuspell is by Dimitrij Mijoski and Sander van Geloven.
33  * See: http://nuspell.github.io/
34  */
35 
36 #include "config.h"
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 
42 #include <string>
43 #include <vector>
44 
45 #include "enchant-provider.h"
46 #include "unused-parameter.h"
47 
48 #include <nuspell/dictionary.hxx>
49 #include <nuspell/finder.hxx>
50 
51 #include <glib.h>
52 
53 using namespace std;
54 using namespace nuspell;
55 
56 /***************************************************************************/
57 
58 class NuspellChecker
59 {
60 public:
61 	bool checkWord (const char *word, size_t len);
62 	char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
63 
64 	bool requestDictionary (const char * szLang);
65 
66 private:
67 	Dictionary nuspell;
68 };
69 
70 /***************************************************************************/
71 
72 bool
checkWord(const char * utf8Word,size_t len)73 NuspellChecker::checkWord(const char *utf8Word, size_t len)
74 {
75 	// the 8-bit encodings use precomposed forms
76 	char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
77 	auto ret = nuspell.spell(normalizedWord);
78 	g_free(normalizedWord);
79 	return ret;
80 }
81 
82 char**
suggestWord(const char * const utf8Word,size_t len,size_t * nsug)83 NuspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug)
84 {
85 	// the 8-bit encodings use precomposed forms
86 	char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
87 	auto suggestions = vector<string>();
88 	nuspell.suggest(normalizedWord, suggestions);
89 	g_free(normalizedWord);
90 	if (suggestions.empty())
91 		return nullptr;
92 	*nsug = suggestions.size();
93 	char **sug = g_new0 (char *, *nsug + 1);
94 	size_t i = 0;
95 	for (auto& suggest : suggestions) {
96 		char *word = g_new0(char, suggest.size() + 1);
97 		strcpy(word, suggest.c_str());
98 		sug[i] = word;
99 		i++;
100 	}
101 	return sug;
102 }
103 
104 static void
s_buildDictionaryDirs(vector<string> & dirs)105 s_buildDictionaryDirs (vector<string> & dirs)
106 {
107 	dirs.clear ();
108 
109 	/* 1. personal overrides for Enchant
110 	 *    ~/.config/enchant/nuspell
111 	 */
112 	gchar * tmp;
113 	char * config_dir = enchant_get_user_config_dir ();
114 	tmp = g_build_filename (config_dir, "nuspell", nullptr);
115 	dirs.push_back (tmp);
116 	free (config_dir);
117 	g_free(tmp);
118 
119 	/* Dynamically retrieved from Nuspell dictionary finder:
120 	 * 2. personal overrides for Hunspell
121 	 *    $XDG_DATA_HOME/hunspell
122 	 *    $XDG_DATA_HOME by default is $HOME/.local/share/
123 	 * 3. system installed for Hunspell
124 	 *    $XDG_DATA_DIRS/hunspell
125 	 *    $XDG_DATA_DIRS/myspell (needed for Fedora)
126 	 *    $XDG_DATA_DIRS by default are /usr/local/share and /usr/share
127 	 */
128 	nuspell::append_default_dir_paths(dirs);
129 
130 	/* 5. system installations by Enchant
131 	 *    /usr/local/share/enchant/nuspell
132 	 *    /usr/share/enchant/nuspell
133 	 */
134 	char * enchant_prefix = enchant_get_prefix_dir();
135 	if (enchant_prefix) {
136 		tmp = g_build_filename(enchant_prefix, "share", "enchant", "nuspell", nullptr);
137 		dirs.push_back (tmp);
138 		g_free(enchant_prefix);
139 		g_free(tmp);
140 	}
141 
142 	/* Hunspell paths are used, therefore ENCHANT_NUSPELL_DICT_DIR is
143 	 * irrelevant. Hence, the following paths are not to be considered:
144 	 * /usr/local/share/nuspell and /usr/share/nuspell
145 	 */
146 }
147 
148 static void
s_buildHashNames(vector<string> & names,const char * dict)149 s_buildHashNames (vector<string> & names, const char * dict)
150 {
151 	names.clear ();
152 
153 	vector<string> dirs;
154 	s_buildDictionaryDirs (dirs);
155 
156 	char *dict_dic = g_strconcat(dict, ".dic", nullptr);
157 	for (size_t i = 0; i < dirs.size(); i++) {
158 		char *tmp = g_build_filename (dirs[i].c_str(), dict_dic, nullptr);
159 		names.push_back (tmp);
160 		g_free (tmp);
161 	}
162 
163 	g_free(dict_dic);
164 }
165 
166 static const string
s_correspondingAffFile(const string & dicFile)167 s_correspondingAffFile(const string & dicFile)
168 {
169 	string aff = dicFile;
170 	if (aff.size() >= 4 && aff.compare(aff.size() - 4, 4, ".dic") == 0) {
171 		aff.erase(aff.size() - 3);
172 		aff += "aff";
173 	}
174 	return aff;
175 }
176 
177 static bool
s_fileExists(const string & file)178 s_fileExists(const string & file)
179 {
180 	return g_file_test(file.c_str(), G_FILE_TEST_EXISTS) != 0;
181 }
182 
is_plausible_dict_for_tag(const char * dir_entry,const char * tag)183 static bool is_plausible_dict_for_tag(const char *dir_entry, const char *tag)
184 {
185 	const char *dic_suffix = ".dic";
186 	size_t dic_suffix_len = strlen(dic_suffix);
187 	size_t dir_entry_len = strlen(dir_entry);
188 	size_t tag_len = strlen(tag);
189 
190 	if (dir_entry_len - dic_suffix_len < tag_len)
191 		return false;
192 	if (strcmp(dir_entry + dir_entry_len - dic_suffix_len, dic_suffix) != 0)
193 		return false;
194 	if (strncmp (dir_entry, tag, tag_len) != 0)
195 		return false;
196 	//e.g. requested dict for "fi",
197 	//reject "fil_PH.dic"
198 	//allow "fi-FOO.dic", "fi_FOO.dic", "fi.dic", etc.
199 	if (!ispunct(dir_entry[tag_len]))
200 		return false;
201 	return true;
202 }
203 
204 static char *
nuspell_request_dictionary(const char * tag)205 nuspell_request_dictionary (const char * tag)
206 {
207 	vector<string> names;
208 
209 	s_buildHashNames (names, tag);
210 
211 	for (size_t i = 0; i < names.size (); i++) {
212 		if (g_file_test(names[i].c_str(), G_FILE_TEST_EXISTS) &&
213 		    s_fileExists(s_correspondingAffFile(names[i]))) {
214 			return strdup (names[i].c_str());
215 		}
216 	}
217 
218 	vector<string> dirs;
219 	s_buildDictionaryDirs (dirs);
220 
221 	for (size_t i = 0; i < dirs.size(); i++) {
222 		GDir *dir = g_dir_open (dirs[i].c_str(), 0, nullptr);
223 		if (dir) {
224 			const char *dir_entry;
225 			while ((dir_entry = g_dir_read_name (dir)) != NULL) {
226 				if (is_plausible_dict_for_tag(dir_entry, tag)) {
227 					char *dict = g_build_filename (dirs[i].c_str(),
228 								       dir_entry, nullptr);
229 					if(s_fileExists(s_correspondingAffFile(dict))) {
230 						g_dir_close (dir);
231 						return dict;
232 					}
233 					g_free(dict);
234 				}
235 			}
236 
237 			g_dir_close (dir);
238 		}
239 	}
240 
241 	return NULL;
242 }
243 
244 bool
requestDictionary(const char * szLang)245 NuspellChecker::requestDictionary(const char *szLang)
246 {
247 	char *dic = nuspell_request_dictionary (szLang);
248 	if (!dic)
249 		return false;
250 	string aff(s_correspondingAffFile(dic));
251 	if (!s_fileExists(aff))
252 		return false;
253 	auto path = string(dic);
254 	free(dic);
255 	if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".dic") == 0)
256 		path.erase(path.size() - 4);
257 	else
258 		return false;
259 	try {
260 		nuspell = nuspell::Dictionary::load_from_path(path);
261 	} catch (const std::runtime_error& e) {
262 		return false;
263 	}
264 
265 	return true;
266 }
267 
268 /*
269  * Enchant
270  */
271 
272 static char **
nuspell_dict_suggest(EnchantDict * me,const char * const word,size_t len,size_t * out_n_suggs)273 nuspell_dict_suggest (EnchantDict * me, const char *const word,
274 		      size_t len, size_t * out_n_suggs)
275 {
276 	NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
277 	return checker->suggestWord (word, len, out_n_suggs);
278 }
279 
280 static int
nuspell_dict_check(EnchantDict * me,const char * const word,size_t len)281 nuspell_dict_check (EnchantDict * me, const char *const word, size_t len)
282 {
283 	NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
284 
285 	return !(checker->checkWord(word, len));
286 }
287 
288 static int
nuspell_dict_is_word_character(EnchantDict * me _GL_UNUSED_PARAMETER,uint32_t uc,size_t n _GL_UNUSED_PARAMETER)289 nuspell_dict_is_word_character (EnchantDict * me _GL_UNUSED_PARAMETER,
290 				uint32_t uc, size_t n _GL_UNUSED_PARAMETER)
291 {
292 	return g_unichar_isalpha(uc);
293 }
294 
295 static void
nuspell_provider_enum_dicts(const char * const directory,vector<string> & out_dicts)296 nuspell_provider_enum_dicts (const char * const directory,
297 			     vector<string> & out_dicts)
298 {
299 	GDir * dir = g_dir_open (directory, 0, nullptr);
300 	if (dir) {
301 		const char * entry;
302 		while ((entry = g_dir_read_name (dir)) != NULL) {
303 			char * utf8_entry = g_filename_to_utf8 (entry, -1, nullptr, nullptr, nullptr);
304 			if (utf8_entry) {
305 				string dir_entry (utf8_entry);
306 				g_free (utf8_entry);
307 
308 				int hit = dir_entry.rfind (".dic");
309 				// don't include hyphenation dictionaries
310 				if (hit != -1) {
311 					// require .aff file to be present
312 					if(dir_entry.compare (0, 5, "hyph_") != 0) {
313 						char * dic = g_build_filename(directory, dir_entry.c_str(), nullptr);
314 						if (s_fileExists(s_correspondingAffFile(dic))) {
315 							out_dicts.push_back (dir_entry.substr (0, hit));
316 						}
317 						g_free(dic);
318 					}
319 				}
320 			}
321 		}
322 
323 		g_dir_close (dir);
324 	}
325 }
326 
327 extern "C" {
328 
329 static char **
nuspell_provider_list_dicts(EnchantProvider * me _GL_UNUSED_PARAMETER,size_t * out_n_dicts)330 nuspell_provider_list_dicts (EnchantProvider * me _GL_UNUSED_PARAMETER,
331 			     size_t * out_n_dicts)
332 {
333 	vector<string> dict_dirs, dicts;
334 	char ** dictionary_list = NULL;
335 
336 	s_buildDictionaryDirs (dict_dirs);
337 
338 	for (size_t i = 0; i < dict_dirs.size(); i++) {
339 		nuspell_provider_enum_dicts (dict_dirs[i].c_str(), dicts);
340 	}
341 
342 	if (dicts.size () > 0) {
343 		dictionary_list = g_new0 (char *, dicts.size() + 1);
344 
345 		for (size_t i = 0; i < dicts.size(); i++)
346 			dictionary_list[i] = g_strdup (dicts[i].c_str());
347 	}
348 
349 	*out_n_dicts = dicts.size ();
350 	return dictionary_list;
351 }
352 
353 static EnchantDict *
nuspell_provider_request_dict(EnchantProvider * me _GL_UNUSED_PARAMETER,const char * const tag)354 nuspell_provider_request_dict(EnchantProvider * me _GL_UNUSED_PARAMETER, const char *const tag)
355 {
356 	NuspellChecker * checker = new NuspellChecker();
357 
358 	if (!checker)
359 		return NULL;
360 
361 	if (!checker->requestDictionary(tag)) {
362 		delete checker;
363 		return NULL;
364 	}
365 
366 	EnchantDict *dict = g_new0(EnchantDict, 1);
367 	dict->user_data = (void *) checker;
368 	dict->check = nuspell_dict_check;
369 	dict->suggest = nuspell_dict_suggest;
370 	// don't implement personal, session
371 	dict->is_word_character = nuspell_dict_is_word_character;
372 
373 	return dict;
374 }
375 
376 static void
nuspell_provider_dispose_dict(EnchantProvider * me _GL_UNUSED_PARAMETER,EnchantDict * dict)377 nuspell_provider_dispose_dict (EnchantProvider * me _GL_UNUSED_PARAMETER, EnchantDict * dict)
378 {
379 	NuspellChecker *checker = (NuspellChecker *) dict->user_data;
380 	delete checker;
381 
382 	g_free (dict);
383 }
384 
385 static int
nuspell_provider_dictionary_exists(struct str_enchant_provider * me _GL_UNUSED_PARAMETER,const char * const tag)386 nuspell_provider_dictionary_exists (struct str_enchant_provider * me _GL_UNUSED_PARAMETER,
387 				    const char *const tag)
388 {
389 	vector <string> names;
390 	s_buildHashNames (names, tag);
391 	for (size_t i = 0; i < names.size(); i++) {
392 		if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS) &&
393 		    s_fileExists(s_correspondingAffFile(names[i]))) {
394 			return 1;
395 		}
396 	}
397 
398 	return 0;
399 }
400 
401 static void
nuspell_provider_dispose(EnchantProvider * me)402 nuspell_provider_dispose (EnchantProvider * me)
403 {
404 	g_free (me);
405 }
406 
407 static const char *
nuspell_provider_identify(EnchantProvider * me _GL_UNUSED_PARAMETER)408 nuspell_provider_identify (EnchantProvider * me _GL_UNUSED_PARAMETER)
409 {
410 	return "nuspell";
411 }
412 
413 static const char *
nuspell_provider_describe(EnchantProvider * me _GL_UNUSED_PARAMETER)414 nuspell_provider_describe (EnchantProvider * me _GL_UNUSED_PARAMETER)
415 {
416 	return "Nuspell Provider";
417 }
418 
419 EnchantProvider *init_enchant_provider (void);
420 
421 EnchantProvider *
init_enchant_provider(void)422 init_enchant_provider (void)
423 {
424 	EnchantProvider *provider = g_new0(EnchantProvider, 1);
425 	provider->dispose = nuspell_provider_dispose;
426 	provider->request_dict = nuspell_provider_request_dict;
427 	provider->dispose_dict = nuspell_provider_dispose_dict;
428 	provider->dictionary_exists = nuspell_provider_dictionary_exists;
429 	provider->identify = nuspell_provider_identify;
430 	provider->describe = nuspell_provider_describe;
431 	provider->list_dicts = nuspell_provider_list_dicts;
432 
433 	return provider;
434 }
435 
436 } // extern C linkage
437