1 /************************************************************************
2 **
3 ** Copyright (C) 2015-2021 Kevin B. Hendricks, Stratford Ontario Canada
4 ** Copyright (C) 2011 John Schember <john@nachtimwald.com>
5 **
6 ** This file is part of Sigil.
7 **
8 ** Sigil is free software: you can redistribute it and/or modify
9 ** it under the terms of the GNU General Public License as published by
10 ** the Free Software Foundation, either version 3 of the License, or
11 ** (at your option) any later version.
12 **
13 ** Sigil is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with Sigil. If not, see <http://www.gnu.org/licenses/>.
20 **
21 *************************************************************************/
22
23 #include <hunspell.hxx>
24
25 #include <QCoreApplication>
26 #include <QDir>
27 #include <QFile>
28 #include <QFileInfo>
29 #include <QIODevice>
30 #include <QTextCodec>
31 #include <QTextStream>
32 #include <QUrl>
33 #include <QApplication>
34 #include <QMutex>
35 #include <QMutexLocker>
36 #include <QDebug>
37
38 #include "Misc/HTMLSpellCheckML.h"
39 #include "Misc/SpellCheck.h"
40 #include "Misc/SettingsStore.h"
41 #include "Misc/Utility.h"
42 #include "sigil_constants.h"
43
44 #define DBG if(0)
45
46 #if !defined(Q_OS_WIN32) && !defined(Q_OS_MAC)
47 # include <stdlib.h>
48 #endif
49
50 SpellCheck *SpellCheck::m_instance = 0;
51
instance()52 SpellCheck *SpellCheck::instance()
53 {
54 if (m_instance == 0) {
55 m_instance = new SpellCheck();
56 }
57
58 return m_instance;
59 }
60
SpellCheck()61 SpellCheck::SpellCheck()
62 {
63 DBG qDebug() << "In SpellCheck Constructor";
64 m_primary.handle = NULL;
65 m_secondary.handle = NULL;
66
67 // There is a considerable lag involved in loading the Spellcheck dictionaries
68 QApplication::setOverrideCursor(Qt::WaitCursor);
69 loadDictionaryNames();
70 // Create the user dictionary word list directiory if necessary.
71 const QString user_directory = userDictionaryDirectory();
72 QDir userDir(user_directory);
73
74 if (!userDir.exists()) {
75 userDir.mkpath(user_directory);
76 }
77
78 // Create the configured file if necessary.
79 QFile userFile(currentUserDictionaryFile());
80
81 if (!userFile.exists()) {
82 if (userFile.open(QIODevice::WriteOnly)) {
83 userFile.close();
84 }
85 }
86
87 QApplication::restoreOverrideCursor();
88
89 UpdateLangCodeToDictMapping();
90
91 // Load the dictionary the user has selected
92 // now open primary and secondary dictionaries
93 SettingsStore settings;
94 loadDictionary(settings.dictionary());
95 if (!settings.secondary_dictionary().isEmpty()) {
96 loadDictionary(settings.secondary_dictionary());
97 }
98 }
99
UpdateLangCodeToDictMapping()100 void SpellCheck::UpdateLangCodeToDictMapping()
101 {
102 DBG qDebug() << "In UpdateLangCodeToDictMapping";
103 m_langcode2dict.clear();
104
105 // create language code to dictionary name mapping
106 foreach(QString dname, m_dictionaries.keys()) {
107 QString lc = dname;
108 lc.replace("_","-");
109 m_langcode2dict[lc] = dname;
110 if (lc.length() > 3) {
111 lc = lc.mid(0,2);
112 m_langcode2dict[lc] = dname;
113 }
114 }
115
116 // make sure 2 letter mapping equivalent is properly set
117 // for primary and secondary dictionaries
118 // Note: must be done last to overwrite any earlier values
119 SettingsStore settings;
120 QString cd = settings.secondary_dictionary();
121 cd.replace("_","-");
122 if (!cd.isEmpty() && (cd.length() > 3)) {
123 m_langcode2dict[cd.mid(0,2)] = settings.secondary_dictionary();
124 }
125 cd = settings.dictionary();
126 cd.replace("_","-");
127 if (!cd.isEmpty() && (cd.length() > 3)) {
128 m_langcode2dict[cd.mid(0,2)] = settings.dictionary();
129 }
130 }
131
UnloadDictionary(const QString & dname)132 void SpellCheck::UnloadDictionary(const QString &dname)
133 {
134 DBG qDebug() << "In UnloadDictionary";
135 QMutexLocker locker(&mutex);
136 if (m_opendicts.contains(dname)) {
137 HDictionary hdic = m_opendicts[dname];
138 if (hdic.handle) {
139 delete hdic.handle;
140 }
141 m_opendicts.remove(dname);
142 }
143 }
144
UnloadAllDictionaries()145 void SpellCheck::UnloadAllDictionaries()
146 {
147 DBG qDebug() << "In UnloadAllDictionaries";
148 foreach(QString name, m_opendicts.keys()) {
149 UnloadDictionary(name);
150 }
151 }
152
~SpellCheck()153 SpellCheck::~SpellCheck()
154 {
155 DBG qDebug() << "In SpellCheck destructor";
156 UnloadAllDictionaries();
157
158 if (m_instance) {
159 delete m_instance;
160 m_instance = 0;
161 }
162 }
163
userDictionaries()164 QStringList SpellCheck::userDictionaries()
165 {
166 DBG qDebug() << "In userDictionaries";
167 // Load the list of user dictionaries.
168 QDir userDictDir(userDictionaryDirectory());
169 QStringList user_dicts = userDictDir.entryList(QDir::Files | QDir::NoDotAndDotDot);
170 user_dicts.sort();
171 return user_dicts;
172 }
173
dictionaries()174 QStringList SpellCheck::dictionaries()
175 {
176 DBG qDebug() << "In dictionaries";
177 loadDictionaryNames();
178 QStringList dicts;
179 dicts = m_dictionaries.keys();
180 dicts.sort();
181 return dicts;
182 }
183
currentPrimaryDictionary() const184 QString SpellCheck::currentPrimaryDictionary() const
185 {
186 DBG qDebug() << "In currentPrimaryDictionary";
187 SettingsStore settings;
188 return settings.dictionary();
189 }
190
spell(const QString & word)191 bool SpellCheck::spell(const QString &word)
192 {
193 DBG qDebug() << "In spell";
194 QString dname = m_langcode2dict.value(HTMLSpellCheckML::langOf(word), "");
195
196 // if no dictionary exists for this language treat it as correct
197 if (dname.isEmpty()) return true;
198
199 // if a dictionary exists but is not open yet, open it first
200 if (!m_opendicts.contains(dname)) {
201 loadDictionary(dname);
202 }
203 if (!m_opendicts.contains(dname)) return true;
204 HDictionary hdic = m_opendicts[dname];
205 Q_ASSERT(hdic.codec != nullptr);
206 Q_ASSERT(hdic.handle != nullptr);
207 bool res = hdic.handle->spell(hdic.codec->fromUnicode(Utility::getSpellingSafeText(HTMLSpellCheckML::textOf(word))).constData()) != 0;
208 res = res || isIgnored(HTMLSpellCheckML::textOf(word));
209 return res;
210 }
211
212
213 // Speed here is very important as it is invoked by the XHTMLHighlighter2 code
214 // and this is the limiting factor
215 // spell check word without langcode info in Primary and Secondary Dictionaries
spellPS(const QString & word)216 bool SpellCheck::spellPS(const QString &word)
217 {
218 if (!m_primary.handle) return true;
219 if(m_ignoredWords.contains(word)) return true;
220 bool res = m_primary.handle->spell(m_primary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData()) != 0;
221 if (res) return true;
222 if (!m_secondary.handle) return false;
223 return m_secondary.handle->spell(m_secondary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData()) != 0;
224 }
225
226
suggest(const QString & word)227 QStringList SpellCheck::suggest(const QString &word)
228 {
229 DBG qDebug() << "In suggest";
230 QStringList suggestions;
231 char **suggestedWords;
232 QString dname = m_langcode2dict.value(HTMLSpellCheckML::langOf(word), "");
233 if (dname.isEmpty()) return suggestions;
234 if (!m_opendicts.contains(dname)) return suggestions;
235 HDictionary hdic = m_opendicts[dname];
236 Q_ASSERT(hdic.codec != nullptr);
237 Q_ASSERT(hdic.handle != nullptr);
238 int count = hdic.handle->suggest(&suggestedWords, hdic.codec->fromUnicode(Utility::getSpellingSafeText(HTMLSpellCheckML::textOf(word))).constData());
239
240 for (int i = 0; i < count; ++i) {
241 suggestions << hdic.codec->toUnicode(suggestedWords[i]);
242 }
243
244 hdic.handle->free_list(&suggestedWords, count);
245 return suggestions;
246 }
247
248
249 // suggesttions for word without langcode using Primary and Secondary Dictionaries
suggestPS(const QString & word)250 QStringList SpellCheck::suggestPS(const QString &word)
251 {
252 QStringList suggestions;
253 char **suggestedWords;
254 char **suggestedWords2;
255 if (!m_primary.handle) return suggestions;
256 int count = m_primary.handle->suggest(&suggestedWords, m_primary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData());
257 int limit = count;
258 if (limit > 4) limit = 4;
259 for (int i = 0; i < limit; ++i) {
260 suggestions << m_primary.codec->toUnicode(suggestedWords[i]);
261 }
262 m_primary.handle->free_list(&suggestedWords, count);
263 if (!m_secondary.handle) return suggestions;
264 count = m_secondary.handle->suggest(&suggestedWords2, m_secondary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData());
265 limit = count;
266 if (limit > 4) limit = 4;
267 for (int i = 0; i < limit; ++i) {
268 suggestions << m_secondary.codec->toUnicode(suggestedWords2[i]);
269 }
270 m_secondary.handle->free_list(&suggestedWords2, count);
271 return suggestions;
272 }
273
274
clearIgnoredWords()275 void SpellCheck::clearIgnoredWords()
276 {
277 DBG qDebug() << "In clearIgnoredWords";
278 m_ignoredWords.clear();
279 }
280
281
ignoreWord(const QString & word)282 void SpellCheck::ignoreWord(const QString &word)
283 {
284 DBG qDebug() << "In ignoreWord";
285 m_ignoredWords.insert(word);
286 }
287
288
isIgnored(const QString & word)289 bool SpellCheck::isIgnored(const QString &word) {
290 DBG qDebug() << "In isIgnored";
291 return m_ignoredWords.contains(word);
292 }
293
294
addWordToDictionary(const QString & word,const QString & dname)295 void SpellCheck::addWordToDictionary(const QString &word, const QString &dname)
296 {
297 DBG qDebug() << "In addWordToDictionary";
298 if (dname.isEmpty()) return;
299 if (m_opendicts.contains(dname)) {
300 HDictionary hdic = m_opendicts[dname];
301 hdic.handle->add(hdic.codec->fromUnicode(Utility::getSpellingSafeText(HTMLSpellCheckML::textOf(word))).constData());
302 }
303 }
304
305
loadDictionary(const QString & dname)306 void SpellCheck::loadDictionary(const QString &dname)
307 {
308 DBG qDebug() << "In loadDictionary: " << dname;
309 QMutexLocker locker(&mutex);
310 // If we don't have a dictionary we cannot continue.
311 if (dname.isEmpty() || !m_dictionaries.contains(dname)) {
312 qDebug() << "attempted to load a non-existent dictionary: " << dname;
313 return;
314 }
315
316 // Dictionary files to use.
317 QString aff = QString("%1%2.aff").arg(m_dictionaries.value(dname)).arg(dname);
318 QString dic = QString("%1%2.dic").arg(m_dictionaries.value(dname)).arg(dname);
319 QString dic_delta = QString("%1/%2.dic_delta").arg(dictionaryDirectory()).arg(dname);
320 QString alt_dic_delta = QString("%1%2.dic_delta").arg(m_dictionaries.value(dname)).arg(dname);
321 // qDebug() << dic_delta;
322 // qDebug() << alt_dic_delta;
323
324 // Create a new hunspell object.
325 HDictionary hdic;
326 hdic.name = dname;
327 hdic.handle = new Hunspell(aff.toLocal8Bit().constData(), dic.toLocal8Bit().constData());
328 if (!hdic.handle) {
329 qDebug() << "failed to load new Hunspell dictionary " << dname;
330 return;
331 }
332
333 // Get the encoding for the text in the dictionary.
334 hdic.codec = QTextCodec::codecForName(hdic.handle->get_dic_encoding());
335 if (hdic.codec == nullptr) {
336 hdic.codec = QTextCodec::codecForName("UTF-8");
337 }
338 if (!hdic.codec) {
339 qDebug() << "failed to load codec " << dname;
340 return;
341 }
342
343 // Get the extra wordchars used for tokenization
344 hdic.wordchars = hdic.codec->toUnicode(hdic.handle->get_wordchars());
345
346 // register it as an open dictionary
347 m_opendicts[dname] = hdic;
348
349 // check for appropriate .dic_delta file and add it
350 // check in user prefs hunspell_dictionaries first
351 // so that user's version is given preference over
352 // any system version
353 QStringList deltaWords;
354 if (QFile(dic_delta).exists()) {
355 dicDeltaWords(dic_delta, deltaWords);
356 } else if (QFile(alt_dic_delta).exists()) {
357 dicDeltaWords(alt_dic_delta, deltaWords);
358 }
359 foreach(QString word, deltaWords){
360 addWordToDictionary(word, dname);
361 }
362
363 // add UserDictionary words to the Primary Dictionary only
364 if (dname == currentPrimaryDictionary()) {
365 // Load in the words from the user dictionaries.
366 foreach(QString word, allUserDictionaryWords()) {
367 addWordToDictionary(word, dname);
368 }
369 }
370
371 SettingsStore settings;
372 // store the primary and secondary dictionary info for speed
373 if (dname == settings.dictionary()) {
374 m_primary = hdic;
375 }
376 else if (dname == settings.secondary_dictionary()) {
377 m_secondary = hdic;
378 }
379 return;
380 }
381
382
setDictionary(const QString & dname,bool forceReplace)383 void SpellCheck::setDictionary(const QString &dname, bool forceReplace)
384 {
385 DBG qDebug() << "In setDictionary " << dname;
386 // See if we are already using a hunspell object for this language.
387 if (!forceReplace && m_opendicts.contains(dname)) {
388 return;
389 }
390
391 UnloadDictionary(dname);
392 loadDictionary(dname);
393 }
394
395
getWordChars(const QString & lang)396 QString SpellCheck::getWordChars(const QString &lang)
397 {
398 DBG qDebug() << "In getWordChars";
399 QString dname;
400 if (lang.isEmpty()) {
401 dname = currentPrimaryDictionary();
402 } else {
403 dname = m_langcode2dict.value(lang, "");
404 }
405
406 if (dname.isEmpty()) return "";
407
408 // if a dictionary exists but is not open yet, open it first
409 if (!m_opendicts.contains(dname)) {
410 loadDictionary(dname);
411 }
412 if (!m_opendicts.contains(dname)) return "";
413 HDictionary hdic = m_opendicts[dname];
414 Q_ASSERT(hdic.codec != nullptr);
415 Q_ASSERT(hdic.handle != nullptr);
416 return hdic.wordchars;
417 }
418
419
addToUserDictionary(const QString & word,QString dict_name)420 void SpellCheck::addToUserDictionary(const QString &word, QString dict_name)
421 {
422 DBG qDebug() << "In AddToUserDictionary";
423 // Adding to the user dictionary also marks the word as a correct spelling.
424 if (word.isEmpty()) {
425 return;
426 }
427
428 SettingsStore settings;
429 if (dict_name.isEmpty()) {
430 dict_name = settings.defaultUserDictionary();
431 }
432
433 // Add the word only if the dictionary is enabled
434 if (settings.enabledUserDictionaries().contains(dict_name)) {
435 addWordToDictionary(word, currentPrimaryDictionary());
436 }
437
438 if (!userDictionaryWords(dict_name).contains(word)) {
439 const QString userDict = userDictionaryFile(dict_name);
440 QFile userDictFile(userDict);
441
442 if (!userDictFile.exists()) {
443 // Try to create the path in case it does not exist.
444 QDir().mkpath(QFileInfo(userDict).absolutePath());
445 }
446
447 // Try to open the file to add the word.
448 if (userDictFile.open(QIODevice::Append)) {
449 QTextStream userDictStream(&userDictFile);
450 userDictStream.setCodec("UTF-8");
451 userDictStream << word << "\n";
452 userDictFile.close();
453 }
454 }
455 }
456
allUserDictionaryWords()457 QStringList SpellCheck::allUserDictionaryWords()
458 {
459 DBG qDebug() << "In allUserDictionaryWords";
460 QStringList userWords;
461 SettingsStore settings;
462 foreach (QString dict_name, settings.enabledUserDictionaries()) {
463 userWords.append(userDictionaryWords(dict_name));
464 }
465 return userWords;
466 }
467
userDictionaryWords(QString dict_name)468 QStringList SpellCheck::userDictionaryWords(QString dict_name)
469 {
470 DBG qDebug() << "In userDictionaryWords";
471 QStringList userWords;
472 // Read each word from the user dictionary.
473
474 QFile userDictFile(userDictionaryFile(dict_name));
475
476 if (userDictFile.open(QIODevice::ReadOnly)) {
477 QTextStream userDictStream(&userDictFile);
478 userDictStream.setCodec("UTF-8");
479 QString line;
480
481 do {
482 line = userDictStream.readLine();
483
484 if (!line.isEmpty()) {
485 userWords << line;
486 }
487 } while (!line.isNull());
488
489 userDictFile.close();
490 }
491
492 userWords.sort();
493 return userWords;
494 }
495
496
dicDeltaWords(const QString & delta_path,QStringList & word_list)497 void SpellCheck::dicDeltaWords(const QString &delta_path, QStringList & word_list)
498 {
499 DBG qDebug() << "In dicDeltaWords";
500 QFile deltaFile(delta_path);
501 if (deltaFile.open(QIODevice::ReadOnly)) {
502 QTextStream deltaStream(&deltaFile);
503 deltaStream.setCodec("UTF-8");
504 QString line;
505 do {
506 line = deltaStream.readLine();
507 if (!line.isEmpty()) {
508 word_list << line;
509 }
510 } while (!line.isNull());
511 deltaFile.close();
512 }
513 return;
514 }
515
516
loadDictionaryNames()517 void SpellCheck::loadDictionaryNames()
518 {
519 DBG qDebug() << "In loadDictionaryNames";
520 QStringList dictExts;
521 dictExts << ".aff"
522 << ".dic";
523 m_dictionaries.clear();
524 const QString user_directory = dictionaryDirectory();
525 QDir userDir(user_directory);
526
527 // Create the user dictionary directory if it does not exist.
528 if (!userDir.exists()) {
529 userDir.mkpath(user_directory);
530 }
531
532 // Paths for each dictionary location.
533 QStringList paths;
534 #ifdef Q_OS_MAC
535 paths << QCoreApplication::applicationDirPath() + "/../hunspell_dictionaries";
536 #elif defined(Q_OS_WIN32)
537 paths << QCoreApplication::applicationDirPath() + "/hunspell_dictionaries";
538 #elif !defined(Q_OS_WIN32) && !defined(Q_OS_MAC)
539 paths << Utility::LinuxHunspellDictionaryDirs();
540 #endif
541 // Add the user dictionary directory last because anything in here
542 // will override installation supplied dictionaries.
543 paths << user_directory;
544 foreach(QString path, paths) {
545 // Find all dictionaries and add them to the avaliable list.
546 QDir dictDir(path);
547
548 if (dictDir.exists()) {
549 QStringList filters;
550 // Look for all .dic files.
551 filters << "*.dic";
552 dictDir.setNameFilters(filters);
553 QStringList otherDicts = dictDir.entryList();
554 foreach(QString ud, otherDicts) {
555 const QFileInfo fileInfo(ud);
556 const QString basename = fileInfo.baseName();
557 const QString udPath = path + "/";
558
559 // We only include the dictionary if it has a corresponding .aff.
560 if (QFile(udPath + basename + ".aff").exists()) {
561 m_dictionaries.insert(basename, udPath);
562 }
563 }
564 }
565 }
566 }
567
dictionaryDirectory()568 QString SpellCheck::dictionaryDirectory()
569 {
570 DBG qDebug() << "In dictionaryDirectory";
571 return Utility::DefinePrefsDir() + "/hunspell_dictionaries";
572 }
573
userDictionaryDirectory()574 QString SpellCheck::userDictionaryDirectory()
575 {
576 DBG qDebug() << "In userDictionaryDirectory";
577 return Utility::DefinePrefsDir() + "/user_dictionaries";
578 }
579
currentUserDictionaryFile()580 QString SpellCheck::currentUserDictionaryFile()
581 {
582 DBG qDebug() << "In currentUserDictionaryFile";
583 SettingsStore settings;
584 return userDictionaryDirectory() + "/" + settings.defaultUserDictionary();
585 }
586
userDictionaryFile(QString dict_name)587 QString SpellCheck::userDictionaryFile(QString dict_name)
588 {
589 DBG qDebug() << "In userDictionaryFile";
590 return userDictionaryDirectory() + "/" + dict_name;
591 }
592