1 /**
2  * \file AspellChecker.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Kevin Atkinson
7  * \author John Levon
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11 
12 #include <config.h>
13 
14 #include "AspellChecker.h"
15 #include "PersonalWordList.h"
16 
17 #include "LyXRC.h"
18 #include "WordLangTuple.h"
19 
20 #include "support/lassert.h"
21 #include "support/debug.h"
22 #include "support/lstrings.h"
23 #include "support/docstring_list.h"
24 
25 #include "support/filetools.h"
26 #include "support/Package.h"
27 #include "support/FileName.h"
28 #include "support/PathChanger.h"
29 
30 #include <aspell.h>
31 
32 #include <map>
33 #include <string>
34 
35 using namespace std;
36 using namespace lyx::support;
37 
38 namespace lyx {
39 
40 namespace {
41 
42 struct Speller {
43 	AspellConfig * config;
44 	AspellCanHaveError * e_speller;
45 	bool accept_compound;
46 	docstring_list ignored_words_;
47 };
48 
49 typedef std::map<std::string, Speller> Spellers;
50 typedef map<std::string, PersonalWordList *> LangPersonalWordList;
51 
52 } // namespace
53 
54 struct AspellChecker::Private
55 {
Privatelyx::AspellChecker::Private56 	Private()
57 	{}
58 
59 	~Private();
60 
61 	/// add a speller of the given language and variety
62 	AspellSpeller * addSpeller(Language const * lang);
63 
64 	///
65 	AspellSpeller * speller(Language const * lang);
66 
67 	bool isValidDictionary(AspellConfig * config,
68 			string const & lang, string const & variety);
69 	int numDictionaries() const;
70 	bool checkAspellData(AspellConfig * config,
71 		string const & basepath, string const & datapath, string const & dictpath,
72 		string const & lang, string const & variety);
73 	AspellConfig * getConfig(string const & lang, string const & variety);
74 
75 	string toAspellWord(docstring const & word) const;
76 
77 	SpellChecker::Result check(AspellSpeller * m,
78 		WordLangTuple const & word) const;
79 
80 	void initSessionDictionary(Speller const & speller, PersonalWordList * pd);
81 	void addToSession(AspellCanHaveError * speller, docstring const & word);
82 	void insert(WordLangTuple const & word);
83 	void remove(WordLangTuple const & word);
84 	bool learned(WordLangTuple const & word);
85 
86 	void accept(Speller & speller, WordLangTuple const & word);
87 
88 	/// the spellers
89 	Spellers spellers_;
90 
91 	LangPersonalWordList personal_;
92 
93 	/// the location below system/user directory
94 	/// there the rws files lookup will happen
dictDirectorylyx::AspellChecker::Private95 	const string dictDirectory(void)
96 	{
97 		return "dicts";
98 	}
99 	/// there the dat+cmap files lookup will happen
dataDirectorylyx::AspellChecker::Private100 	const string dataDirectory(void)
101 	{
102 		return "data";
103 	}
104 	/// os package directory constants
105 	/// macports on Mac OS X or
106 	/// aspell rpms on Linux
osPackageBaselyx::AspellChecker::Private107 	const string osPackageBase(void)
108 	{
109 #ifdef USE_MACOSX_PACKAGING
110 		return "/opt/local";
111 #else
112 		return "/usr";
113 #endif
114 	}
osPackageDictDirectorylyx::AspellChecker::Private115 	const string osPackageDictDirectory(void)
116 	{
117 #ifdef USE_MACOSX_PACKAGING
118 		return "/share/aspell";
119 #else
120 		return "/lib/aspell-0.60";
121 #endif
122 	}
osPackageDataDirectorylyx::AspellChecker::Private123 	const string osPackageDataDirectory(void)
124 	{
125 		return "/lib/aspell-0.60";
126 	}
127 };
128 
129 
~Private()130 AspellChecker::Private::~Private()
131 {
132 	Spellers::iterator it = spellers_.begin();
133 	Spellers::iterator end = spellers_.end();
134 
135 	for (; it != end; ++it) {
136 		if (it->second.e_speller) {
137 			AspellSpeller * speller = to_aspell_speller(it->second.e_speller);
138 			aspell_speller_save_all_word_lists(speller);
139 			delete_aspell_can_have_error(it->second.e_speller);
140 		}
141 		delete_aspell_config(it->second.config);
142 	}
143 
144 	LangPersonalWordList::const_iterator pdit = personal_.begin();
145 	LangPersonalWordList::const_iterator pdet = personal_.end();
146 
147 	for (; pdit != pdet; ++pdit) {
148 		if (0 == pdit->second)
149 			continue;
150 		PersonalWordList * pd = pdit->second;
151 		pd->save();
152 		delete pd;
153 	}
154 }
155 
156 
isValidDictionary(AspellConfig * config,string const & lang,string const & variety)157 bool AspellChecker::Private::isValidDictionary(AspellConfig * config,
158 		string const & lang, string const & variety)
159 {
160 	bool have = false;
161 	// code taken from aspell's list-dicts example
162 	// the returned pointer should _not_ need to be deleted
163 	AspellDictInfoList * dlist = get_aspell_dict_info_list(config);
164 	AspellDictInfoEnumeration * dels = aspell_dict_info_list_elements(dlist);
165 	const AspellDictInfo * entry;
166 
167 	while (0 != (entry = aspell_dict_info_enumeration_next(dels))) {
168 		LYXERR(Debug::DEBUG, "aspell dict:"
169 			<< " name="    << entry->name
170 			<< ",code="    << entry->code
171 			<< ",variety=" << entry->jargon);
172 		if (entry->code == lang && (variety.empty() || entry->jargon == variety)) {
173 			have = true;
174 			break;
175 		}
176 	}
177 	delete_aspell_dict_info_enumeration(dels);
178 	LYXERR(Debug::FILES, "aspell dictionary: " << lang << (have ? " yes" : " no"));
179 	return have;
180 }
181 
182 
checkAspellData(AspellConfig * config,string const & basepath,string const & datapath,string const & dictpath,string const & lang,string const & variety)183 bool AspellChecker::Private::checkAspellData(AspellConfig * config,
184 	string const & basepath, string const & datapath, string const & dictpath,
185 	string const & lang, string const & variety)
186 {
187 	FileName base(basepath);
188 	bool have_dict = base.isDirectory();
189 
190 	if (have_dict) {
191 		FileName data(addPath(base.absFileName(), datapath));
192 		FileName dict(addPath(base.absFileName(), dictpath));
193 		have_dict = dict.isDirectory() && data.isDirectory();
194 		if (have_dict) {
195 			LYXERR(Debug::FILES, "aspell dict-dir: " << dict);
196 			LYXERR(Debug::FILES, "aspell data-dir: " << data);
197 			aspell_config_replace(config, "dict-dir", dict.absFileName().c_str());
198 			aspell_config_replace(config, "data-dir", data.absFileName().c_str());
199 			have_dict = isValidDictionary(config, lang, variety);
200 		}
201 	}
202 	return have_dict;
203 }
204 
205 
getConfig(string const & lang,string const & variety)206 AspellConfig * AspellChecker::Private::getConfig(string const & lang, string const & variety)
207 {
208 	AspellConfig * config = new_aspell_config();
209 	bool have_dict = false;
210 	string const sysdir = lyx::support::package().system_support().absFileName();
211 	string const userdir = lyx::support::package().user_support().absFileName();
212 
213 	LYXERR(Debug::FILES, "aspell user dir: " << userdir);
214 	have_dict = checkAspellData(config, userdir, dataDirectory(), dictDirectory(), lang, variety);
215 	if (!have_dict) {
216 		LYXERR(Debug::FILES, "aspell sysdir dir: " << sysdir);
217 		have_dict = checkAspellData(config, sysdir, dataDirectory(), dictDirectory(), lang, variety);
218 	}
219 	if (!have_dict) {
220 		// check for package data of OS installation
221 		checkAspellData(config, osPackageBase(), osPackageDataDirectory(), osPackageDictDirectory(), lang, variety);
222 	}
223 	return config;
224 }
225 
226 
addToSession(AspellCanHaveError * speller,docstring const & word)227 void AspellChecker::Private::addToSession(AspellCanHaveError * speller, docstring const & word)
228 {
229 	string const word_to_add = toAspellWord(word);
230 	if (1 != aspell_speller_add_to_session(to_aspell_speller(speller), word_to_add.c_str(), -1))
231 		LYXERR(Debug::GUI, "aspell add to session: " << aspell_error_message(speller));
232 }
233 
234 
initSessionDictionary(Speller const & speller,PersonalWordList * pd)235 void AspellChecker::Private::initSessionDictionary(
236 	Speller const & speller,
237 	PersonalWordList * pd)
238 {
239 	AspellSpeller * aspell = to_aspell_speller(speller.e_speller);
240 	aspell_speller_clear_session(aspell);
241 	docstring_list::const_iterator it = pd->begin();
242 	docstring_list::const_iterator et = pd->end();
243 	for (; it != et; ++it) {
244 		addToSession(speller.e_speller, *it);
245 	}
246 	it = speller.ignored_words_.begin();
247 	et = speller.ignored_words_.end();
248 	for (; it != et; ++it) {
249 		addToSession(speller.e_speller, *it);
250 	}
251 }
252 
253 
addSpeller(Language const * lang)254 AspellSpeller * AspellChecker::Private::addSpeller(Language const * lang)
255 {
256 	Speller m;
257 	string const code = lang->code();
258 	string const variety = lang->variety();
259 	m.config = getConfig(code, variety);
260 	// Aspell supports both languages and varieties (such as German
261 	// old vs. new spelling). The respective naming convention is
262 	// lang_REGION-variety (e.g. de_DE-alt).
263 	aspell_config_replace(m.config, "lang", code.c_str());
264 	if (!variety.empty())
265 		aspell_config_replace(m.config, "variety", variety.c_str());
266 	// Set the encoding to utf-8.
267 	// aspell does also understand "ucs-4", so we would not need a
268 	// conversion in theory, but if this is used it expects all
269 	// char const * arguments to be a cast from  uint const *, and it
270 	// seems that this uint is not compatible with our char_type on some
271 	// platforms (cygwin, OS X). Therefore we use utf-8, that does
272 	// always work.
273 	aspell_config_replace(m.config, "encoding", "utf-8");
274 	if (lyxrc.spellchecker_accept_compound)
275 		// Consider run-together words as legal compounds
276 		aspell_config_replace(m.config, "run-together", "true");
277 	else
278 		// Report run-together words as errors
279 		aspell_config_replace(m.config, "run-together", "false");
280 
281 	m.accept_compound = lyxrc.spellchecker_accept_compound;
282 	m.e_speller = new_aspell_speller(m.config);
283 	if (aspell_error_number(m.e_speller) != 0) {
284 		// FIXME: We should indicate somehow that this language is not supported.
285 		LYXERR(Debug::FILES, "aspell error: " << aspell_error_message(m.e_speller));
286 		delete_aspell_can_have_error(m.e_speller);
287 		delete_aspell_config(m.config);
288 		m.config = 0;
289 		m.e_speller = 0;
290 	} else {
291 		PersonalWordList * pd = new PersonalWordList(lang->lang());
292 		pd->load();
293 		personal_[lang->lang()] = pd;
294 		initSessionDictionary(m, pd);
295 	}
296 
297 	spellers_[lang->lang()] = m;
298 	return m.e_speller ? to_aspell_speller(m.e_speller) : 0;
299 }
300 
301 
speller(Language const * lang)302 AspellSpeller * AspellChecker::Private::speller(Language const * lang)
303 {
304 	Spellers::iterator it = spellers_.find(lang->lang());
305 	if (it != spellers_.end()) {
306 		Speller aspell = it->second;
307 		if (lyxrc.spellchecker_accept_compound != aspell.accept_compound) {
308 			// spell checker setting changed... adjust run-together
309 			aspell.accept_compound = lyxrc.spellchecker_accept_compound;
310 			if (aspell.accept_compound)
311 				// Consider run-together words as legal compounds
312 				aspell_config_replace(aspell.config, "run-together", "true");
313 			else
314 				// Report run-together words as errors
315 				aspell_config_replace(aspell.config, "run-together", "false");
316 			AspellCanHaveError * e_speller = aspell.e_speller;
317 			aspell.e_speller = new_aspell_speller(aspell.config);
318 			delete_aspell_speller(to_aspell_speller(e_speller));
319 			spellers_[lang->lang()] = aspell;
320 		}
321 		return to_aspell_speller(aspell.e_speller);
322 	}
323 
324 	return addSpeller(lang);
325 }
326 
327 
numDictionaries() const328 int AspellChecker::Private::numDictionaries() const
329 {
330 	int result = 0;
331 	Spellers::const_iterator it = spellers_.begin();
332 	Spellers::const_iterator et = spellers_.end();
333 
334 	for (; it != et; ++it) {
335 		Speller aspell = it->second;
336 		result += aspell.e_speller != 0;
337 	}
338 	return result;
339 }
340 
341 
toAspellWord(docstring const & word) const342 string AspellChecker::Private::toAspellWord(docstring const & word) const
343 {
344 	size_t mpos;
345 	string word_str = to_utf8(word);
346 	while ((mpos = word_str.find('-')) != word_str.npos) {
347 		word_str.erase(mpos, 1);
348 	}
349 	return word_str;
350 }
351 
352 
check(AspellSpeller * m,WordLangTuple const & word) const353 SpellChecker::Result AspellChecker::Private::check(
354 	AspellSpeller * m, WordLangTuple const & word)
355 	const
356 {
357 	SpellChecker::Result result = WORD_OK;
358 	docstring w1;
359 	LYXERR(Debug::GUI, "spellCheck: \"" <<
360 		   word.word() << "\", lang = " << word.lang()->lang()) ;
361 	docstring rest = split(word.word(), w1, '-');
362 	for (; result == WORD_OK;) {
363 		string const word_str = toAspellWord(w1);
364 		int const word_ok = aspell_speller_check(m, word_str.c_str(), -1);
365 		LASSERT(word_ok != -1, return UNKNOWN_WORD);
366 		result = (word_ok) ? WORD_OK : UNKNOWN_WORD;
367 		if (rest.empty())
368 			break;
369 		rest = split(rest, w1, '-');
370 	}
371 	if (result == WORD_OK)
372 		return result;
373 	string const word_str = toAspellWord(word.word());
374 	int const word_ok = aspell_speller_check(m, word_str.c_str(), -1);
375 	LASSERT(word_ok != -1, return UNKNOWN_WORD);
376 	return (word_ok) ? WORD_OK : UNKNOWN_WORD;
377 }
378 
accept(Speller & speller,WordLangTuple const & word)379 void AspellChecker::Private::accept(Speller & speller, WordLangTuple const & word)
380 {
381 	speller.ignored_words_.push_back(word.word());
382 }
383 
384 
385 /// personal word list interface
remove(WordLangTuple const & word)386 void AspellChecker::Private::remove(WordLangTuple const & word)
387 {
388 	PersonalWordList * pd = personal_[word.lang()->lang()];
389 	if (!pd)
390 		return;
391 	pd->remove(word.word());
392 	Spellers::iterator it = spellers_.find(word.lang()->lang());
393 	if (it != spellers_.end()) {
394 		initSessionDictionary(it->second, pd);
395 	}
396 }
397 
398 
insert(WordLangTuple const & word)399 void AspellChecker::Private::insert(WordLangTuple const & word)
400 {
401 	Spellers::iterator it = spellers_.find(word.lang()->lang());
402 	if (it != spellers_.end()) {
403 		addToSession(it->second.e_speller, word.word());
404 		PersonalWordList * pd = personal_[word.lang()->lang()];
405 		if (!pd)
406 			return;
407 		pd->insert(word.word());
408 	}
409 }
410 
learned(WordLangTuple const & word)411 bool AspellChecker::Private::learned(WordLangTuple const & word)
412 {
413 	PersonalWordList * pd = personal_[word.lang()->lang()];
414 	if (!pd)
415 		return false;
416 	return pd->exists(word.word());
417 }
418 
419 
AspellChecker()420 AspellChecker::AspellChecker()
421 	: d(new Private)
422 {}
423 
424 
~AspellChecker()425 AspellChecker::~AspellChecker()
426 {
427 	delete d;
428 }
429 
430 
check(WordLangTuple const & word)431 SpellChecker::Result AspellChecker::check(WordLangTuple const & word)
432 {
433 	AspellSpeller * m = d->speller(word.lang());
434 
435 	if (!m)
436 		return NO_DICTIONARY;
437 
438 	if (word.word().empty())
439 		// MSVC compiled Aspell doesn't like it.
440 		return WORD_OK;
441 
442 	SpellChecker::Result rc = d->check(m, word);
443 	return (rc == WORD_OK && d->learned(word)) ? LEARNED_WORD : rc;
444 }
445 
446 
advanceChangeNumber()447 void AspellChecker::advanceChangeNumber()
448 {
449 	nextChangeNumber();
450 }
451 
452 
insert(WordLangTuple const & word)453 void AspellChecker::insert(WordLangTuple const & word)
454 {
455 	d->insert(word);
456 	advanceChangeNumber();
457 }
458 
459 
accept(WordLangTuple const & word)460 void AspellChecker::accept(WordLangTuple const & word)
461 {
462 	Spellers::iterator it = d->spellers_.find(word.lang()->lang());
463 	if (it != d->spellers_.end()) {
464 		d->addToSession(it->second.e_speller, word.word());
465 		d->accept(it->second, word);
466 		advanceChangeNumber();
467 	}
468 }
469 
470 
suggest(WordLangTuple const & wl,docstring_list & suggestions)471 void AspellChecker::suggest(WordLangTuple const & wl,
472 	docstring_list & suggestions)
473 {
474 	suggestions.clear();
475 	AspellSpeller * m = d->speller(wl.lang());
476 
477 	if (!m)
478 		return;
479 
480 	string const word = d->toAspellWord(wl.word());
481 	AspellWordList const * sugs =
482 		aspell_speller_suggest(m, word.c_str(), -1);
483 	LASSERT(sugs != 0, return);
484 	AspellStringEnumeration * els = aspell_word_list_elements(sugs);
485 	if (!els || aspell_word_list_empty(sugs))
486 		return;
487 
488 	for (;;) {
489 		char const * str = aspell_string_enumeration_next(els);
490 		if (!str)
491 			break;
492 		suggestions.push_back(from_utf8(str));
493 	}
494 
495 	delete_aspell_string_enumeration(els);
496 }
497 
498 
remove(WordLangTuple const & word)499 void AspellChecker::remove(WordLangTuple const & word)
500 {
501 	d->remove(word);
502 	advanceChangeNumber();
503 }
504 
505 
hasDictionary(Language const * lang) const506 bool AspellChecker::hasDictionary(Language const * lang) const
507 {
508 	bool have = false;
509 	Spellers::iterator it = d->spellers_.begin();
510 	Spellers::iterator end = d->spellers_.end();
511 
512 	if (lang) {
513 		for (; it != end && !have; ++it) {
514 			have = it->second.config && d->isValidDictionary(it->second.config, lang->code(), lang->variety());
515 		}
516 		if (!have) {
517 			AspellConfig * config = d->getConfig(lang->code(), lang->variety());
518 			have = d->isValidDictionary(config, lang->code(), lang->variety());
519 			delete_aspell_config(config);
520 		}
521 	}
522 	return have;
523 }
524 
525 
numDictionaries() const526 int AspellChecker::numDictionaries() const
527 {
528 	return d->numDictionaries();
529 }
530 
531 
error()532 docstring const AspellChecker::error()
533 {
534 	Spellers::iterator it = d->spellers_.begin();
535 	Spellers::iterator end = d->spellers_.end();
536 	char const * err = 0;
537 
538 	for (; it != end && 0 == err; ++it) {
539 		if (it->second.e_speller && aspell_error_number(it->second.e_speller) != 0)
540 			err = aspell_error_message(it->second.e_speller);
541 	}
542 
543 	// FIXME UNICODE: err is not in UTF8, but probably the locale encoding
544 	return (err ? from_utf8(err) : docstring());
545 }
546 
547 
548 } // namespace lyx
549