1 /************************************************************************
2 **
3 **  Copyright (C) 2015-2021 Kevin B. Hendricks, Stratford Ontario Canada
4 **  Copyright (C) 2011      John Schember <john@nachtimwald.com>
5 **
6 **  This file is part of Sigil.
7 **
8 **  Sigil is free software: you can redistribute it and/or modify
9 **  it under the terms of the GNU General Public License as published by
10 **  the Free Software Foundation, either version 3 of the License, or
11 **  (at your option) any later version.
12 **
13 **  Sigil is distributed in the hope that it will be useful,
14 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 **  GNU General Public License for more details.
17 **
18 **  You should have received a copy of the GNU General Public License
19 **  along with Sigil.  If not, see <http://www.gnu.org/licenses/>.
20 **
21 *************************************************************************/
22 
23 #include <hunspell.hxx>
24 
25 #include <QCoreApplication>
26 #include <QDir>
27 #include <QFile>
28 #include <QFileInfo>
29 #include <QIODevice>
30 #include <QTextCodec>
31 #include <QTextStream>
32 #include <QUrl>
33 #include <QApplication>
34 #include <QMutex>
35 #include <QMutexLocker>
36 #include <QDebug>
37 
38 #include "Misc/HTMLSpellCheckML.h"
39 #include "Misc/SpellCheck.h"
40 #include "Misc/SettingsStore.h"
41 #include "Misc/Utility.h"
42 #include "sigil_constants.h"
43 
44 #define DBG if(0)
45 
46 #if !defined(Q_OS_WIN32) && !defined(Q_OS_MAC)
47 # include <stdlib.h>
48 #endif
49 
50 SpellCheck *SpellCheck::m_instance = 0;
51 
instance()52 SpellCheck *SpellCheck::instance()
53 {
54     if (m_instance == 0) {
55         m_instance = new SpellCheck();
56     }
57 
58     return m_instance;
59 }
60 
SpellCheck()61 SpellCheck::SpellCheck()
62 {
63     DBG qDebug() << "In SpellCheck Constructor";
64     m_primary.handle = NULL;
65     m_secondary.handle = NULL;
66 
67     // There is a considerable lag involved in loading the Spellcheck dictionaries
68     QApplication::setOverrideCursor(Qt::WaitCursor);
69     loadDictionaryNames();
70     // Create the user dictionary word list directiory if necessary.
71     const QString user_directory = userDictionaryDirectory();
72     QDir userDir(user_directory);
73 
74     if (!userDir.exists()) {
75         userDir.mkpath(user_directory);
76     }
77 
78     // Create the configured file if necessary.
79     QFile userFile(currentUserDictionaryFile());
80 
81     if (!userFile.exists()) {
82         if (userFile.open(QIODevice::WriteOnly)) {
83             userFile.close();
84         }
85     }
86 
87     QApplication::restoreOverrideCursor();
88 
89     UpdateLangCodeToDictMapping();
90 
91     // Load the dictionary the user has selected
92     // now open primary and secondary dictionaries
93     SettingsStore settings;
94     loadDictionary(settings.dictionary());
95     if (!settings.secondary_dictionary().isEmpty()) {
96         loadDictionary(settings.secondary_dictionary());
97     }
98 }
99 
UpdateLangCodeToDictMapping()100 void SpellCheck::UpdateLangCodeToDictMapping()
101 {
102     DBG qDebug() << "In UpdateLangCodeToDictMapping";
103     m_langcode2dict.clear();
104 
105     // create language code to dictionary name mapping
106     foreach(QString dname, m_dictionaries.keys()) {
107         QString lc = dname;
108         lc.replace("_","-");
109         m_langcode2dict[lc] = dname;
110         if (lc.length() > 3) {
111             lc = lc.mid(0,2);
112             m_langcode2dict[lc] = dname;
113         }
114     }
115 
116     // make sure 2 letter mapping equivalent is properly set
117     // for primary and secondary dictionaries
118     // Note: must be done last to overwrite any earlier values
119     SettingsStore settings;
120     QString cd = settings.secondary_dictionary();
121     cd.replace("_","-");
122     if (!cd.isEmpty() && (cd.length() > 3)) {
123         m_langcode2dict[cd.mid(0,2)] = settings.secondary_dictionary();
124     }
125     cd = settings.dictionary();
126     cd.replace("_","-");
127     if (!cd.isEmpty() && (cd.length() > 3)) {
128         m_langcode2dict[cd.mid(0,2)] = settings.dictionary();
129     }
130 }
131 
UnloadDictionary(const QString & dname)132 void SpellCheck::UnloadDictionary(const QString &dname)
133 {
134     DBG qDebug() << "In UnloadDictionary";
135     QMutexLocker locker(&mutex);
136     if (m_opendicts.contains(dname)) {
137         HDictionary hdic = m_opendicts[dname];
138         if (hdic.handle) {
139             delete hdic.handle;
140         }
141         m_opendicts.remove(dname);
142     }
143 }
144 
UnloadAllDictionaries()145 void SpellCheck::UnloadAllDictionaries()
146 {
147     DBG qDebug() << "In UnloadAllDictionaries";
148     foreach(QString name, m_opendicts.keys()) {
149         UnloadDictionary(name);
150     }
151 }
152 
~SpellCheck()153 SpellCheck::~SpellCheck()
154 {
155     DBG qDebug() << "In SpellCheck destructor";
156     UnloadAllDictionaries();
157 
158     if (m_instance) {
159         delete m_instance;
160         m_instance = 0;
161     }
162 }
163 
userDictionaries()164 QStringList SpellCheck::userDictionaries()
165 {
166     DBG qDebug() << "In userDictionaries";
167     // Load the list of user dictionaries.
168     QDir userDictDir(userDictionaryDirectory());
169     QStringList user_dicts = userDictDir.entryList(QDir::Files | QDir::NoDotAndDotDot);
170     user_dicts.sort();
171     return user_dicts;
172 }
173 
dictionaries()174 QStringList SpellCheck::dictionaries()
175 {
176     DBG qDebug() << "In dictionaries";
177     loadDictionaryNames();
178     QStringList dicts;
179     dicts = m_dictionaries.keys();
180     dicts.sort();
181     return dicts;
182 }
183 
currentPrimaryDictionary() const184 QString SpellCheck::currentPrimaryDictionary() const
185 {
186     DBG qDebug() << "In currentPrimaryDictionary";
187     SettingsStore settings;
188     return settings.dictionary();
189 }
190 
spell(const QString & word)191 bool SpellCheck::spell(const QString &word)
192 {
193     DBG qDebug() << "In spell";
194     QString dname = m_langcode2dict.value(HTMLSpellCheckML::langOf(word), "");
195 
196     // if no dictionary exists for this language treat it as correct
197     if (dname.isEmpty()) return true;
198 
199     // if a dictionary exists but is not open yet, open it first
200     if (!m_opendicts.contains(dname)) {
201         loadDictionary(dname);
202     }
203     if (!m_opendicts.contains(dname)) return true;
204     HDictionary hdic = m_opendicts[dname];
205     Q_ASSERT(hdic.codec != nullptr);
206     Q_ASSERT(hdic.handle != nullptr);
207     bool res = hdic.handle->spell(hdic.codec->fromUnicode(Utility::getSpellingSafeText(HTMLSpellCheckML::textOf(word))).constData()) != 0;
208     res = res || isIgnored(HTMLSpellCheckML::textOf(word));
209     return res;
210 }
211 
212 
213 // Speed here is very important as it is invoked by the XHTMLHighlighter2 code
214 // and this is the limiting factor
215 // spell check word without langcode info in Primary and Secondary Dictionaries
spellPS(const QString & word)216 bool SpellCheck::spellPS(const QString &word)
217 {
218     if (!m_primary.handle) return true;
219     if(m_ignoredWords.contains(word)) return true;
220     bool res = m_primary.handle->spell(m_primary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData()) != 0;
221     if (res) return true;
222     if (!m_secondary.handle) return false;
223     return m_secondary.handle->spell(m_secondary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData()) != 0;
224 }
225 
226 
suggest(const QString & word)227 QStringList SpellCheck::suggest(const QString &word)
228 {
229     DBG qDebug() << "In suggest";
230     QStringList suggestions;
231     char **suggestedWords;
232     QString dname = m_langcode2dict.value(HTMLSpellCheckML::langOf(word), "");
233     if (dname.isEmpty()) return suggestions;
234     if (!m_opendicts.contains(dname)) return suggestions;
235     HDictionary hdic = m_opendicts[dname];
236     Q_ASSERT(hdic.codec != nullptr);
237     Q_ASSERT(hdic.handle != nullptr);
238     int count = hdic.handle->suggest(&suggestedWords, hdic.codec->fromUnicode(Utility::getSpellingSafeText(HTMLSpellCheckML::textOf(word))).constData());
239 
240     for (int i = 0; i < count; ++i) {
241         suggestions << hdic.codec->toUnicode(suggestedWords[i]);
242     }
243 
244     hdic.handle->free_list(&suggestedWords, count);
245     return suggestions;
246 }
247 
248 
249 // suggesttions for word without langcode using Primary and Secondary Dictionaries
suggestPS(const QString & word)250 QStringList SpellCheck::suggestPS(const QString &word)
251 {
252     QStringList suggestions;
253     char **suggestedWords;
254     char **suggestedWords2;
255     if (!m_primary.handle) return suggestions;
256     int count = m_primary.handle->suggest(&suggestedWords, m_primary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData());
257     int limit = count;
258     if (limit > 4) limit = 4;
259     for (int i = 0; i < limit; ++i) {
260         suggestions << m_primary.codec->toUnicode(suggestedWords[i]);
261     }
262     m_primary.handle->free_list(&suggestedWords, count);
263     if (!m_secondary.handle) return suggestions;
264     count = m_secondary.handle->suggest(&suggestedWords2, m_secondary.codec->fromUnicode(Utility::getSpellingSafeText(word)).constData());
265     limit = count;
266     if (limit > 4) limit = 4;
267     for (int i = 0; i < limit; ++i) {
268         suggestions << m_secondary.codec->toUnicode(suggestedWords2[i]);
269     }
270     m_secondary.handle->free_list(&suggestedWords2, count);
271     return suggestions;
272 }
273 
274 
clearIgnoredWords()275 void SpellCheck::clearIgnoredWords()
276 {
277     DBG qDebug() << "In clearIgnoredWords";
278     m_ignoredWords.clear();
279 }
280 
281 
ignoreWord(const QString & word)282 void SpellCheck::ignoreWord(const QString &word)
283 {
284     DBG qDebug() << "In ignoreWord";
285     m_ignoredWords.insert(word);
286 }
287 
288 
isIgnored(const QString & word)289 bool SpellCheck::isIgnored(const QString &word) {
290     DBG qDebug() << "In isIgnored";
291     return m_ignoredWords.contains(word);
292 }
293 
294 
addWordToDictionary(const QString & word,const QString & dname)295 void SpellCheck::addWordToDictionary(const QString &word, const QString &dname)
296 {
297     DBG qDebug() << "In addWordToDictionary";
298     if (dname.isEmpty()) return;
299     if (m_opendicts.contains(dname)) {
300         HDictionary hdic = m_opendicts[dname];
301         hdic.handle->add(hdic.codec->fromUnicode(Utility::getSpellingSafeText(HTMLSpellCheckML::textOf(word))).constData());
302     }
303 }
304 
305 
loadDictionary(const QString & dname)306 void SpellCheck::loadDictionary(const QString &dname)
307 {
308     DBG qDebug() << "In loadDictionary: " << dname;
309     QMutexLocker locker(&mutex);
310     // If we don't have a dictionary we cannot continue.
311     if (dname.isEmpty() || !m_dictionaries.contains(dname)) {
312         qDebug() << "attempted to load a non-existent dictionary: " << dname;
313         return;
314     }
315 
316     // Dictionary files to use.
317     QString aff = QString("%1%2.aff").arg(m_dictionaries.value(dname)).arg(dname);
318     QString dic = QString("%1%2.dic").arg(m_dictionaries.value(dname)).arg(dname);
319     QString dic_delta = QString("%1/%2.dic_delta").arg(dictionaryDirectory()).arg(dname);
320     QString alt_dic_delta = QString("%1%2.dic_delta").arg(m_dictionaries.value(dname)).arg(dname);
321     // qDebug() << dic_delta;
322     // qDebug() << alt_dic_delta;
323 
324     // Create a new hunspell object.
325     HDictionary hdic;
326     hdic.name = dname;
327     hdic.handle = new Hunspell(aff.toLocal8Bit().constData(), dic.toLocal8Bit().constData());
328     if (!hdic.handle) {
329         qDebug() << "failed to load new Hunspell dictionary " << dname;
330         return;
331     }
332 
333     // Get the encoding for the text in the dictionary.
334     hdic.codec = QTextCodec::codecForName(hdic.handle->get_dic_encoding());
335     if (hdic.codec == nullptr) {
336         hdic.codec = QTextCodec::codecForName("UTF-8");
337     }
338     if (!hdic.codec) {
339         qDebug() << "failed to load codec " << dname;
340         return;
341     }
342 
343     // Get the extra wordchars used for tokenization
344     hdic.wordchars = hdic.codec->toUnicode(hdic.handle->get_wordchars());
345 
346     // register it as an open dictionary
347     m_opendicts[dname] = hdic;
348 
349     // check for appropriate .dic_delta file and add it
350     // check in user prefs hunspell_dictionaries first
351     // so that user's version is given preference over
352     // any system version
353     QStringList deltaWords;
354     if (QFile(dic_delta).exists()) {
355         dicDeltaWords(dic_delta, deltaWords);
356     } else if (QFile(alt_dic_delta).exists()) {
357         dicDeltaWords(alt_dic_delta, deltaWords);
358     }
359     foreach(QString word, deltaWords){
360         addWordToDictionary(word, dname);
361     }
362 
363     // add UserDictionary words to the Primary Dictionary only
364     if (dname == currentPrimaryDictionary()) {
365         // Load in the words from the user dictionaries.
366         foreach(QString word, allUserDictionaryWords()) {
367             addWordToDictionary(word, dname);
368         }
369     }
370 
371     SettingsStore settings;
372     // store the primary and secondary dictionary info for speed
373     if (dname == settings.dictionary()) {
374         m_primary = hdic;
375     }
376     else if (dname == settings.secondary_dictionary()) {
377         m_secondary = hdic;
378     }
379     return;
380 }
381 
382 
setDictionary(const QString & dname,bool forceReplace)383 void SpellCheck::setDictionary(const QString &dname, bool forceReplace)
384 {
385     DBG qDebug() << "In setDictionary " << dname;
386     // See if we are already using a hunspell object for this language.
387     if (!forceReplace && m_opendicts.contains(dname)) {
388         return;
389     }
390 
391     UnloadDictionary(dname);
392     loadDictionary(dname);
393 }
394 
395 
getWordChars(const QString & lang)396 QString SpellCheck::getWordChars(const QString &lang)
397 {
398     DBG qDebug() << "In getWordChars";
399     QString dname;
400     if (lang.isEmpty()) {
401         dname = currentPrimaryDictionary();
402     } else {
403         dname = m_langcode2dict.value(lang, "");
404     }
405 
406     if (dname.isEmpty()) return "";
407 
408     // if a dictionary exists but is not open yet, open it first
409     if (!m_opendicts.contains(dname)) {
410         loadDictionary(dname);
411     }
412     if (!m_opendicts.contains(dname)) return "";
413     HDictionary hdic = m_opendicts[dname];
414     Q_ASSERT(hdic.codec != nullptr);
415     Q_ASSERT(hdic.handle != nullptr);
416     return hdic.wordchars;
417 }
418 
419 
addToUserDictionary(const QString & word,QString dict_name)420 void SpellCheck::addToUserDictionary(const QString &word, QString dict_name)
421 {
422     DBG qDebug() << "In AddToUserDictionary";
423     // Adding to the user dictionary also marks the word as a correct spelling.
424     if (word.isEmpty()) {
425         return;
426     }
427 
428     SettingsStore settings;
429     if (dict_name.isEmpty()) {
430         dict_name = settings.defaultUserDictionary();
431     }
432 
433     // Add the word only if the dictionary is enabled
434     if (settings.enabledUserDictionaries().contains(dict_name)) {
435         addWordToDictionary(word, currentPrimaryDictionary());
436     }
437 
438     if (!userDictionaryWords(dict_name).contains(word)) {
439         const QString userDict = userDictionaryFile(dict_name);
440         QFile userDictFile(userDict);
441 
442         if (!userDictFile.exists()) {
443             // Try to create the path in case it does not exist.
444             QDir().mkpath(QFileInfo(userDict).absolutePath());
445         }
446 
447         // Try to open the file to add the word.
448         if (userDictFile.open(QIODevice::Append)) {
449             QTextStream userDictStream(&userDictFile);
450             userDictStream.setCodec("UTF-8");
451             userDictStream << word << "\n";
452             userDictFile.close();
453         }
454     }
455 }
456 
allUserDictionaryWords()457 QStringList SpellCheck::allUserDictionaryWords()
458 {
459     DBG qDebug() << "In allUserDictionaryWords";
460     QStringList userWords;
461     SettingsStore settings;
462     foreach (QString dict_name, settings.enabledUserDictionaries()) {
463         userWords.append(userDictionaryWords(dict_name));
464     }
465     return userWords;
466 }
467 
userDictionaryWords(QString dict_name)468 QStringList SpellCheck::userDictionaryWords(QString dict_name)
469 {
470     DBG qDebug() << "In userDictionaryWords";
471     QStringList userWords;
472     // Read each word from the user dictionary.
473 
474     QFile userDictFile(userDictionaryFile(dict_name));
475 
476     if (userDictFile.open(QIODevice::ReadOnly)) {
477         QTextStream userDictStream(&userDictFile);
478         userDictStream.setCodec("UTF-8");
479         QString line;
480 
481         do {
482             line = userDictStream.readLine();
483 
484             if (!line.isEmpty()) {
485                 userWords << line;
486             }
487         } while (!line.isNull());
488 
489         userDictFile.close();
490     }
491 
492     userWords.sort();
493     return userWords;
494 }
495 
496 
dicDeltaWords(const QString & delta_path,QStringList & word_list)497 void SpellCheck::dicDeltaWords(const QString &delta_path, QStringList & word_list)
498 {
499     DBG qDebug() << "In dicDeltaWords";
500     QFile deltaFile(delta_path);
501     if (deltaFile.open(QIODevice::ReadOnly)) {
502         QTextStream deltaStream(&deltaFile);
503         deltaStream.setCodec("UTF-8");
504         QString line;
505         do {
506             line = deltaStream.readLine();
507             if (!line.isEmpty()) {
508                 word_list << line;
509             }
510         } while (!line.isNull());
511         deltaFile.close();
512     }
513     return;
514 }
515 
516 
loadDictionaryNames()517 void SpellCheck::loadDictionaryNames()
518 {
519     DBG qDebug() << "In loadDictionaryNames";
520     QStringList dictExts;
521     dictExts << ".aff"
522              << ".dic";
523     m_dictionaries.clear();
524     const QString user_directory = dictionaryDirectory();
525     QDir userDir(user_directory);
526 
527     // Create the user dictionary directory if it does not exist.
528     if (!userDir.exists()) {
529         userDir.mkpath(user_directory);
530     }
531 
532     // Paths for each dictionary location.
533     QStringList paths;
534 #ifdef Q_OS_MAC
535     paths << QCoreApplication::applicationDirPath() + "/../hunspell_dictionaries";
536 #elif defined(Q_OS_WIN32)
537     paths << QCoreApplication::applicationDirPath() + "/hunspell_dictionaries";
538 #elif !defined(Q_OS_WIN32) && !defined(Q_OS_MAC)
539     paths << Utility::LinuxHunspellDictionaryDirs();
540 #endif
541     // Add the user dictionary directory last because anything in here
542     // will override installation supplied dictionaries.
543     paths << user_directory;
544     foreach(QString path, paths) {
545         // Find all dictionaries and add them to the avaliable list.
546         QDir dictDir(path);
547 
548         if (dictDir.exists()) {
549             QStringList filters;
550             // Look for all .dic files.
551             filters << "*.dic";
552             dictDir.setNameFilters(filters);
553             QStringList otherDicts = dictDir.entryList();
554             foreach(QString ud, otherDicts) {
555                 const QFileInfo fileInfo(ud);
556                 const QString basename = fileInfo.baseName();
557                 const QString udPath = path + "/";
558 
559                 // We only include the dictionary if it has a corresponding .aff.
560                 if (QFile(udPath + basename + ".aff").exists()) {
561                     m_dictionaries.insert(basename, udPath);
562                 }
563             }
564         }
565     }
566 }
567 
dictionaryDirectory()568 QString SpellCheck::dictionaryDirectory()
569 {
570     DBG qDebug() << "In dictionaryDirectory";
571     return Utility::DefinePrefsDir() + "/hunspell_dictionaries";
572 }
573 
userDictionaryDirectory()574 QString SpellCheck::userDictionaryDirectory()
575 {
576     DBG qDebug() << "In userDictionaryDirectory";
577     return Utility::DefinePrefsDir() + "/user_dictionaries";
578 }
579 
currentUserDictionaryFile()580 QString SpellCheck::currentUserDictionaryFile()
581 {
582     DBG qDebug() << "In currentUserDictionaryFile";
583     SettingsStore settings;
584     return userDictionaryDirectory() + "/" + settings.defaultUserDictionary();
585 }
586 
userDictionaryFile(QString dict_name)587 QString SpellCheck::userDictionaryFile(QString dict_name)
588 {
589     DBG qDebug() << "In userDictionaryFile";
590     return userDictionaryDirectory() + "/" + dict_name;
591 }
592