1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the Qt Assistant of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "qhelpsearchindexwriter_default_p.h"
41 #include "qhelp_global.h"
42 #include "qhelpenginecore.h"
43 #include "qhelpdbreader_p.h"
44 
45 #include <QtCore/QDataStream>
46 #include <QtCore/QDateTime>
47 #include <QtCore/QDir>
48 #include <QtCore/QTextCodec>
49 #include <QtCore/QTextStream>
50 #include <QtCore/QSet>
51 #include <QtCore/QUrl>
52 #include <QtCore/QVariant>
53 #include <QtSql/QSqlDatabase>
54 #include <QtSql/QSqlDriver>
55 #include <QtSql/QSqlError>
56 #include <QtSql/QSqlQuery>
57 
58 #include <QTextDocument>
59 
60 QT_BEGIN_NAMESPACE
61 
62 namespace fulltextsearch {
63 namespace qt {
64 
65 const char FTS_DB_NAME[] = "fts";
66 
Writer(const QString & path)67 Writer::Writer(const QString &path)
68     : m_dbDir(path)
69 {
70     clearLegacyIndex();
71     QDir().mkpath(m_dbDir);
72     m_uniqueId = QHelpGlobal::uniquifyConnectionName(QLatin1String("QHelpWriter"), this);
73     m_db = new QSqlDatabase();
74     *m_db = QSqlDatabase::addDatabase(QLatin1String("QSQLITE"), m_uniqueId);
75     const QString dbPath = m_dbDir + QLatin1Char('/') + QLatin1String(FTS_DB_NAME);
76     m_db->setDatabaseName(dbPath);
77     if (!m_db->open()) {
78         const QString &error = QHelpSearchIndexWriter::tr("Cannot open database \"%1\" using connection \"%2\": %3")
79                 .arg(dbPath, m_uniqueId, m_db->lastError().text());
80         qWarning("%s", qUtf8Printable(error));
81         delete m_db;
82         m_db = nullptr;
83         QSqlDatabase::removeDatabase(m_uniqueId);
84         m_uniqueId = QString();
85     } else {
86         startTransaction();
87     }
88 }
89 
tryInit(bool reindex)90 bool Writer::tryInit(bool reindex)
91 {
92     if (!m_db)
93         return true;
94 
95     QSqlQuery query(*m_db);
96     // HACK: we try to perform any modifying command just to check if
97     // we don't get SQLITE_BUSY code (SQLITE_BUSY is defined to 5 in sqlite driver)
98     if (!query.exec(QLatin1String("CREATE TABLE foo ();"))) {
99         if (query.lastError().nativeErrorCode() == QLatin1String("5")) // db is locked
100             return false;
101     }
102     // HACK: clear what we have created
103     query.exec(QLatin1String("DROP TABLE foo;"));
104 
105     init(reindex);
106     return true;
107 }
108 
hasDB()109 bool Writer::hasDB()
110 {
111     if (!m_db)
112         return false;
113 
114     QSqlQuery query(*m_db);
115 
116     query.prepare(QLatin1String("SELECT id FROM info LIMIT 1"));
117     query.exec();
118 
119     return query.next();
120 }
121 
clearLegacyIndex()122 void Writer::clearLegacyIndex()
123 {
124     // Clear old legacy clucene index.
125     // More important in case of Creator, since
126     // the index folder is common for all Creator versions
127     QDir dir(m_dbDir);
128     if (!dir.exists())
129         return;
130 
131     const QStringList &list = dir.entryList(QDir::Files | QDir::Hidden);
132     if (!list.contains(QLatin1String(FTS_DB_NAME))) {
133         for (const QString &item : list)
134             dir.remove(item);
135     }
136 }
137 
init(bool reindex)138 void Writer::init(bool reindex)
139 {
140     if (!m_db)
141         return;
142 
143     QSqlQuery query(*m_db);
144 
145     if (reindex && hasDB()) {
146         m_needOptimize = true;
147 
148         query.exec(QLatin1String("DROP TABLE titles;"));
149         query.exec(QLatin1String("DROP TABLE contents;"));
150         query.exec(QLatin1String("DROP TABLE info;"));
151     }
152 
153     query.exec(QLatin1String("CREATE TABLE info (id INTEGER PRIMARY KEY, namespace, attributes, url, title, data);"));
154 
155     query.exec(QLatin1String("CREATE VIRTUAL TABLE titles USING fts5("
156                              "namespace UNINDEXED, attributes UNINDEXED, "
157                              "url UNINDEXED, title, "
158                              "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
159     query.exec(QLatin1String("CREATE TRIGGER titles_insert AFTER INSERT ON info BEGIN "
160                              "INSERT INTO titles(rowid, namespace, attributes, url, title) "
161                              "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
162                              "END;"));
163     query.exec(QLatin1String("CREATE TRIGGER titles_delete AFTER DELETE ON info BEGIN "
164                              "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
165                              "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
166                              "END;"));
167     query.exec(QLatin1String("CREATE TRIGGER titles_update AFTER UPDATE ON info BEGIN "
168                              "INSERT INTO titles(titles, rowid, namespace, attributes, url, title) "
169                              "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title); "
170                              "INSERT INTO titles(rowid, namespace, attributes, url, title) "
171                              "VALUES(new.id, new.namespace, new.attributes, new.url, new.title); "
172                              "END;"));
173 
174     query.exec(QLatin1String("CREATE VIRTUAL TABLE contents USING fts5("
175                              "namespace UNINDEXED, attributes UNINDEXED, "
176                              "url UNINDEXED, title, data, "
177                              "tokenize = 'porter unicode61', content = 'info', content_rowid='id');"));
178     query.exec(QLatin1String("CREATE TRIGGER contents_insert AFTER INSERT ON info BEGIN "
179                              "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
180                              "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
181                              "END;"));
182     query.exec(QLatin1String("CREATE TRIGGER contents_delete AFTER DELETE ON info BEGIN "
183                              "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
184                              "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
185                              "END;"));
186     query.exec(QLatin1String("CREATE TRIGGER contents_update AFTER UPDATE ON info BEGIN "
187                              "INSERT INTO contents(contents, rowid, namespace, attributes, url, title, data) "
188                              "VALUES('delete', old.id, old.namespace, old.attributes, old.url, old.title, old.data); "
189                              "INSERT INTO contents(rowid, namespace, attributes, url, title, data) "
190                              "VALUES(new.id, new.namespace, new.attributes, new.url, new.title, new.data); "
191                              "END;"));
192 }
193 
~Writer()194 Writer::~Writer()
195 {
196     if (m_db) {
197         m_db->close();
198         delete m_db;
199     }
200 
201     if (!m_uniqueId.isEmpty())
202         QSqlDatabase::removeDatabase(m_uniqueId);
203 }
204 
flush()205 void Writer::flush()
206 {
207     if (!m_db)
208         return;
209 
210     QSqlQuery query(*m_db);
211 
212     query.prepare(QLatin1String("INSERT INTO info (namespace, attributes, url, title, data) VALUES (?, ?, ?, ?, ?)"));
213     query.addBindValue(m_namespaces);
214     query.addBindValue(m_attributes);
215     query.addBindValue(m_urls);
216     query.addBindValue(m_titles);
217     query.addBindValue(m_contents);
218     query.execBatch();
219 
220     m_namespaces = QVariantList();
221     m_attributes = QVariantList();
222     m_urls = QVariantList();
223     m_titles = QVariantList();
224     m_contents = QVariantList();
225 }
226 
removeNamespace(const QString & namespaceName)227 void Writer::removeNamespace(const QString &namespaceName)
228 {
229     if (!m_db)
230         return;
231 
232     if (!hasNamespace(namespaceName))
233         return; // no data to delete
234 
235     m_needOptimize = true;
236 
237     QSqlQuery query(*m_db);
238 
239     query.prepare(QLatin1String("DELETE FROM info WHERE namespace = ?"));
240     query.addBindValue(namespaceName);
241     query.exec();
242 }
243 
hasNamespace(const QString & namespaceName)244 bool Writer::hasNamespace(const QString &namespaceName)
245 {
246     if (!m_db)
247         return false;
248 
249     QSqlQuery query(*m_db);
250 
251     query.prepare(QLatin1String("SELECT id FROM info WHERE namespace = ? LIMIT 1"));
252     query.addBindValue(namespaceName);
253     query.exec();
254 
255     return query.next();
256 }
257 
insertDoc(const QString & namespaceName,const QString & attributes,const QString & url,const QString & title,const QString & contents)258 void Writer::insertDoc(const QString &namespaceName,
259                        const QString &attributes,
260                        const QString &url,
261                        const QString &title,
262                        const QString &contents)
263 {
264     m_namespaces.append(namespaceName);
265     m_attributes.append(attributes);
266     m_urls.append(url);
267     m_titles.append(title);
268     m_contents.append(contents);
269 }
270 
startTransaction()271 void Writer::startTransaction()
272 {
273     if (!m_db)
274         return;
275 
276     m_needOptimize = false;
277     if (m_db && m_db->driver()->hasFeature(QSqlDriver::Transactions))
278         m_db->transaction();
279 }
280 
endTransaction()281 void Writer::endTransaction()
282 {
283     if (!m_db)
284         return;
285 
286     QSqlQuery query(*m_db);
287 
288     if (m_needOptimize) {
289         query.exec(QLatin1String("INSERT INTO titles(titles) VALUES('rebuild')"));
290         query.exec(QLatin1String("INSERT INTO contents(contents) VALUES('rebuild')"));
291     }
292 
293     if (m_db && m_db->driver()->hasFeature(QSqlDriver::Transactions))
294         m_db->commit();
295 
296     if (m_needOptimize)
297         query.exec(QLatin1String("VACUUM"));
298 }
299 
QHelpSearchIndexWriter()300 QHelpSearchIndexWriter::QHelpSearchIndexWriter()
301     : QThread()
302     , m_cancel(false)
303 {
304 }
305 
~QHelpSearchIndexWriter()306 QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
307 {
308     m_mutex.lock();
309     this->m_cancel = true;
310     m_mutex.unlock();
311 
312     wait();
313 }
314 
cancelIndexing()315 void QHelpSearchIndexWriter::cancelIndexing()
316 {
317     QMutexLocker lock(&m_mutex);
318     m_cancel = true;
319 }
320 
updateIndex(const QString & collectionFile,const QString & indexFilesFolder,bool reindex)321 void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
322                                          const QString &indexFilesFolder,
323                                          bool reindex)
324 {
325     wait();
326     QMutexLocker lock(&m_mutex);
327 
328     m_cancel = false;
329     m_reindex = reindex;
330     m_collectionFile = collectionFile;
331     m_indexFilesFolder = indexFilesFolder;
332 
333     lock.unlock();
334 
335     start(QThread::LowestPriority);
336 }
337 
338 static const char IndexedNamespacesKey[] = "FTS5IndexedNamespaces";
339 
readIndexMap(const QHelpEngineCore & engine)340 static QMap<QString, QDateTime> readIndexMap(const QHelpEngineCore &engine)
341 {
342     QMap<QString, QDateTime> indexMap;
343     QDataStream dataStream(engine.customValue(
344                 QLatin1String(IndexedNamespacesKey)).toByteArray());
345     dataStream >> indexMap;
346     return indexMap;
347 }
348 
writeIndexMap(QHelpEngineCore * engine,const QMap<QString,QDateTime> & indexMap)349 static bool writeIndexMap(QHelpEngineCore *engine,
350     const QMap<QString, QDateTime> &indexMap)
351 {
352     QByteArray data;
353 
354     QDataStream dataStream(&data, QIODevice::ReadWrite);
355     dataStream << indexMap;
356 
357     return engine->setCustomValue(
358                 QLatin1String(IndexedNamespacesKey), data);
359 }
360 
clearIndexMap(QHelpEngineCore * engine)361 static bool clearIndexMap(QHelpEngineCore *engine)
362 {
363     return engine->removeCustomValue(QLatin1String(IndexedNamespacesKey));
364 }
365 
run()366 void QHelpSearchIndexWriter::run()
367 {
368     QMutexLocker lock(&m_mutex);
369 
370     if (m_cancel)
371         return;
372 
373     const bool reindex(m_reindex);
374     const QString collectionFile(m_collectionFile);
375     const QString indexPath(m_indexFilesFolder);
376 
377     lock.unlock();
378 
379     QHelpEngineCore engine(collectionFile, nullptr);
380     if (!engine.setupData())
381         return;
382 
383     if (reindex)
384         clearIndexMap(&engine);
385 
386     emit indexingStarted();
387 
388     Writer writer(indexPath);
389 
390     while (!writer.tryInit(reindex))
391         sleep(1);
392 
393     const QStringList &registeredDocs = engine.registeredDocumentations();
394     QMap<QString, QDateTime> indexMap = readIndexMap(engine);
395 
396     if (!reindex) {
397         for (const QString &namespaceName : registeredDocs) {
398             if (indexMap.contains(namespaceName)) {
399                 const QString path = engine.documentationFileName(namespaceName);
400                 if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) {
401                     // Remove some outdated indexed stuff
402                     indexMap.remove(namespaceName);
403                     writer.removeNamespace(namespaceName);
404                 } else if (!writer.hasNamespace(namespaceName)) {
405                     // No data in fts db for namespace.
406                     // The namespace could have been removed from fts db
407                     // or the whole fts db have been removed
408                     // without removing it from indexMap.
409                     indexMap.remove(namespaceName);
410                 }
411             } else {
412                 // Needed in case namespaceName was removed from indexMap
413                 // without removing it from fts db.
414                 // May happen when e.g. qch file was removed manually
415                 // without removing fts db.
416                 writer.removeNamespace(namespaceName);
417             }
418         // TODO: we may also detect if there are any other data
419         // and remove it
420         }
421     } else {
422         indexMap.clear();
423     }
424 
425     for (const QString &namespaceName : indexMap.keys()) {
426         if (!registeredDocs.contains(namespaceName)) {
427             indexMap.remove(namespaceName);
428             writer.removeNamespace(namespaceName);
429         }
430     }
431 
432     for (const QString &namespaceName : registeredDocs) {
433         lock.relock();
434         if (m_cancel) {
435             // store what we have done so far
436             writeIndexMap(&engine, indexMap);
437             writer.endTransaction();
438             emit indexingFinished();
439             return;
440         }
441         lock.unlock();
442 
443         // if indexed, continue
444         if (indexMap.contains(namespaceName))
445             continue;
446 
447         const QString fileName = engine.documentationFileName(namespaceName);
448         QHelpDBReader reader(fileName, QHelpGlobal::uniquifyConnectionName(
449                                  fileName, this), nullptr);
450         if (!reader.init())
451             continue;
452 
453         const QString virtualFolder = reader.virtualFolder();
454 
455         const QList<QStringList> &attributeSets =
456             engine.filterAttributeSets(namespaceName);
457 
458         for (const QStringList &attributes : attributeSets) {
459             const QString &attributesString = attributes.join(QLatin1Char('|'));
460 
461             const QMap<QString, QByteArray> htmlFiles
462                     = reader.filesData(attributes, QLatin1String("html"));
463             const QMap<QString, QByteArray> htmFiles
464                     = reader.filesData(attributes, QLatin1String("htm"));
465             const QMap<QString, QByteArray> txtFiles
466                     = reader.filesData(attributes, QLatin1String("txt"));
467 
468             QMultiMap<QString, QByteArray> files = htmlFiles;
469             files.unite(htmFiles);
470             files.unite(txtFiles);
471 
472             for (auto it = files.cbegin(), end = files.cend(); it != end ; ++it) {
473                 lock.relock();
474                 if (m_cancel) {
475                     // store what we have done so far
476                     writeIndexMap(&engine, indexMap);
477                     writer.endTransaction();
478                     emit indexingFinished();
479                     return;
480                 }
481                 lock.unlock();
482 
483                 const QString &file = it.key();
484                 const QByteArray &data = it.value();
485 
486                 if (data.isEmpty())
487                     continue;
488 
489                 QUrl url;
490                 url.setScheme(QLatin1String("qthelp"));
491                 url.setAuthority(namespaceName);
492                 url.setPath(QLatin1Char('/') + virtualFolder + QLatin1Char('/') + file);
493 
494                 if (url.hasFragment())
495                     url.setFragment(QString());
496 
497                 const QString &fullFileName = url.toString();
498                 if (!fullFileName.endsWith(QLatin1String(".html"))
499                         && !fullFileName.endsWith(QLatin1String(".htm"))
500                         && !fullFileName.endsWith(QLatin1String(".txt"))) {
501                     continue;
502                 }
503 
504                 QTextStream s(data);
505                 const QString &en = QHelpGlobal::codecFromData(data);
506                 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
507 
508                 const QString &text = s.readAll();
509                 if (text.isEmpty())
510                     continue;
511 
512                 QString title;
513                 QString contents;
514                 if (fullFileName.endsWith(QLatin1String(".txt"))) {
515                     title = fullFileName.mid(fullFileName.lastIndexOf(QLatin1Char('/')) + 1);
516                     contents = text.toHtmlEscaped();
517                 } else {
518                     QTextDocument doc;
519                     doc.setHtml(text);
520 
521                     title = doc.metaInformation(QTextDocument::DocumentTitle).toHtmlEscaped();
522                     contents = doc.toPlainText().toHtmlEscaped();
523                 }
524 
525                 writer.insertDoc(namespaceName, attributesString, fullFileName, title, contents);
526             }
527         }
528         writer.flush();
529         const QString &path = engine.documentationFileName(namespaceName);
530         indexMap.insert(namespaceName, QFileInfo(path).lastModified());
531     }
532 
533     writeIndexMap(&engine, indexMap);
534 
535     writer.endTransaction();
536     emit indexingFinished();
537 }
538 
539 }   // namespace std
540 }   // namespace fulltextsearch
541 
542 QT_END_NAMESPACE
543